{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# -*-encoding:UTF-8-*-\n",
    "import csv\n",
    "import pandas as pd\n",
    "import pickle\n",
    "import itertools\n",
    "import datetime\n",
    "import numpy as np\n",
    "import scipy.io as sio\n",
    "import scipy.sparse as ss\n",
    "import scipy.spatial.distance as ssd\n",
    "from collections import defaultdict\n",
    "from sklearn.preprocessing import normalize\n",
    "from sklearn.cluster import MiniBatchKMeans\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.decomposition import PCA\n",
    "from sklearn import metrics\n",
    "import time\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3137972, 110)"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train = pd.read_csv('train.csv')\n",
    "test = pd.read_csv('test.csv')\n",
    "\n",
    "ftrain = pd.read_csv('events.csv')\n",
    "ftrain.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>event_id</th>\n",
       "      <th>user_id</th>\n",
       "      <th>start_time</th>\n",
       "      <th>city</th>\n",
       "      <th>state</th>\n",
       "      <th>zip</th>\n",
       "      <th>country</th>\n",
       "      <th>lat</th>\n",
       "      <th>lng</th>\n",
       "      <th>c_1</th>\n",
       "      <th>...</th>\n",
       "      <th>c_92</th>\n",
       "      <th>c_93</th>\n",
       "      <th>c_94</th>\n",
       "      <th>c_95</th>\n",
       "      <th>c_96</th>\n",
       "      <th>c_97</th>\n",
       "      <th>c_98</th>\n",
       "      <th>c_99</th>\n",
       "      <th>c_100</th>\n",
       "      <th>c_other</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>684921758</td>\n",
       "      <td>3647864012</td>\n",
       "      <td>2012-10-31T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>244999119</td>\n",
       "      <td>3476440521</td>\n",
       "      <td>2012-11-03T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3928440935</td>\n",
       "      <td>517514445</td>\n",
       "      <td>2012-11-05T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2582345152</td>\n",
       "      <td>781585781</td>\n",
       "      <td>2012-10-30T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1051165850</td>\n",
       "      <td>1016098580</td>\n",
       "      <td>2012-09-27T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1212611096</td>\n",
       "      <td>1426522332</td>\n",
       "      <td>2012-11-16T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>3689283674</td>\n",
       "      <td>725266702</td>\n",
       "      <td>2012-11-02T20:00:00.003Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>2584113432</td>\n",
       "      <td>613687941</td>\n",
       "      <td>2012-10-31T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>354</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>3365728297</td>\n",
       "      <td>1098509207</td>\n",
       "      <td>2012-10-31T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>47.058</td>\n",
       "      <td>21.926</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>2912638473</td>\n",
       "      <td>3598071768</td>\n",
       "      <td>2012-10-18T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10 rows × 110 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     event_id     user_id                start_time city state  zip country  \\\n",
       "0   684921758  3647864012  2012-10-31T00:00:00.001Z  NaN   NaN  NaN     NaN   \n",
       "1   244999119  3476440521  2012-11-03T00:00:00.001Z  NaN   NaN  NaN     NaN   \n",
       "2  3928440935   517514445  2012-11-05T00:00:00.001Z  NaN   NaN  NaN     NaN   \n",
       "3  2582345152   781585781  2012-10-30T00:00:00.001Z  NaN   NaN  NaN     NaN   \n",
       "4  1051165850  1016098580  2012-09-27T00:00:00.001Z  NaN   NaN  NaN     NaN   \n",
       "5  1212611096  1426522332  2012-11-16T00:00:00.001Z  NaN   NaN  NaN     NaN   \n",
       "6  3689283674   725266702  2012-11-02T20:00:00.003Z  NaN   NaN  NaN     NaN   \n",
       "7  2584113432   613687941  2012-10-31T00:00:00.001Z  NaN   NaN  NaN     NaN   \n",
       "8  3365728297  1098509207  2012-10-31T00:00:00.001Z  NaN   NaN  NaN     NaN   \n",
       "9  2912638473  3598071768  2012-10-18T00:00:00.001Z  NaN   NaN  NaN     NaN   \n",
       "\n",
       "      lat     lng  c_1   ...     c_92  c_93  c_94  c_95  c_96  c_97  c_98  \\\n",
       "0     NaN     NaN    2   ...        0     1     0     0     0     0     0   \n",
       "1     NaN     NaN    2   ...        0     0     0     0     0     0     0   \n",
       "2     NaN     NaN    0   ...        0     0     0     0     0     0     0   \n",
       "3     NaN     NaN    1   ...        0     0     0     0     0     0     0   \n",
       "4     NaN     NaN    1   ...        0     0     0     0     0     0     0   \n",
       "5     NaN     NaN    0   ...        0     0     0     0     0     0     0   \n",
       "6     NaN     NaN    0   ...        0     0     0     0     0     0     0   \n",
       "7     NaN     NaN    0   ...        2     0     0     0     0     0     0   \n",
       "8  47.058  21.926    0   ...        0     0     0     0     0     0     0   \n",
       "9     NaN     NaN    1   ...        0     0     0     0     0     0     0   \n",
       "\n",
       "   c_99  c_100  c_other  \n",
       "0     0      0        9  \n",
       "1     0      0        7  \n",
       "2     0      0       12  \n",
       "3     0      0        8  \n",
       "4     0      0        9  \n",
       "5     0      0       22  \n",
       "6     0      0       28  \n",
       "7     0      0      354  \n",
       "8     1      0       25  \n",
       "9     0      0        3  \n",
       "\n",
       "[10 rows x 110 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ftrain.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "events = ftrain['event_id']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.series.Series'>\n"
     ]
    }
   ],
   "source": [
    "events.shape\n",
    "print(type(events))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of uniqueUsers :3391\n",
      "number of uniqueEvents :13418\n"
     ]
    }
   ],
   "source": [
    "# 统计训练集中有多少不同的用户的events\n",
    "uniqueUsers = set() #统计train和test 中不同的user\n",
    "uniqueEvents = set() #统计train和test中不同的event\n",
    "\n",
    "#倒排表\n",
    "#统计每个用户参加的活动   / 每个活动参加的用户\n",
    "eventsForUser = defaultdict(set)  #统计各个event有多少个不同user对其有交互\n",
    "usersForEvent = defaultdict(set)#统计各个user对多少个不同的event产生交互\n",
    "    \n",
    "for filename in [\"train.csv\", \"test.csv\"]:\n",
    "    f = open(filename, 'r')\n",
    "    \n",
    "    #忽略第一行（列名字）\n",
    "    f.readline().strip().split(\",\")\n",
    "    \n",
    "    for line in f:    #对每条记录\n",
    "        cols = line.strip().split(\",\")\n",
    "        uniqueUsers.add(cols[0])   #第一列为用户ID\n",
    "        uniqueEvents.add(cols[1])   #第二列为活动ID\n",
    "        \n",
    "        #eventsForUser[cols[0]].add(cols[1])    #该用户参加了这个活动\n",
    "        #usersForEvent[cols[1]].add(cols[0])    #该活动被用户参加\n",
    "    f.close()\n",
    "    \n",
    "\n",
    "n_uniqueUsers = len(uniqueUsers)\n",
    "n_uniqueEvents = len(uniqueEvents)\n",
    "\n",
    "print(\"number of uniqueUsers :%d\" % n_uniqueUsers)\n",
    "print(\"number of uniqueEvents :%d\" % n_uniqueEvents)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "uniqueEvents = list(uniqueEvents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'list'>\n"
     ]
    }
   ],
   "source": [
    "print(type(uniqueEvents))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "利用isin()函数，提取与uniqueEvents相同的id保存为eventT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "eventT = ftrain['event_id'].isin(uniqueEvents)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "selected即是train和test出现的事件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>event_id</th>\n",
       "      <th>user_id</th>\n",
       "      <th>start_time</th>\n",
       "      <th>city</th>\n",
       "      <th>state</th>\n",
       "      <th>zip</th>\n",
       "      <th>country</th>\n",
       "      <th>lat</th>\n",
       "      <th>lng</th>\n",
       "      <th>c_1</th>\n",
       "      <th>...</th>\n",
       "      <th>c_92</th>\n",
       "      <th>c_93</th>\n",
       "      <th>c_94</th>\n",
       "      <th>c_95</th>\n",
       "      <th>c_96</th>\n",
       "      <th>c_97</th>\n",
       "      <th>c_98</th>\n",
       "      <th>c_99</th>\n",
       "      <th>c_100</th>\n",
       "      <th>c_other</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>684921758</td>\n",
       "      <td>3647864012</td>\n",
       "      <td>2012-10-31T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>244999119</td>\n",
       "      <td>3476440521</td>\n",
       "      <td>2012-11-03T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3928440935</td>\n",
       "      <td>517514445</td>\n",
       "      <td>2012-11-05T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2582345152</td>\n",
       "      <td>781585781</td>\n",
       "      <td>2012-10-30T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1051165850</td>\n",
       "      <td>1016098580</td>\n",
       "      <td>2012-09-27T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 110 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     event_id     user_id                start_time city state  zip country  \\\n",
       "0   684921758  3647864012  2012-10-31T00:00:00.001Z  NaN   NaN  NaN     NaN   \n",
       "1   244999119  3476440521  2012-11-03T00:00:00.001Z  NaN   NaN  NaN     NaN   \n",
       "2  3928440935   517514445  2012-11-05T00:00:00.001Z  NaN   NaN  NaN     NaN   \n",
       "3  2582345152   781585781  2012-10-30T00:00:00.001Z  NaN   NaN  NaN     NaN   \n",
       "4  1051165850  1016098580  2012-09-27T00:00:00.001Z  NaN   NaN  NaN     NaN   \n",
       "\n",
       "   lat  lng  c_1   ...     c_92  c_93  c_94  c_95  c_96  c_97  c_98  c_99  \\\n",
       "0  NaN  NaN    2   ...        0     1     0     0     0     0     0     0   \n",
       "1  NaN  NaN    2   ...        0     0     0     0     0     0     0     0   \n",
       "2  NaN  NaN    0   ...        0     0     0     0     0     0     0     0   \n",
       "3  NaN  NaN    1   ...        0     0     0     0     0     0     0     0   \n",
       "4  NaN  NaN    1   ...        0     0     0     0     0     0     0     0   \n",
       "\n",
       "   c_100  c_other  \n",
       "0      0        9  \n",
       "1      0        7  \n",
       "2      0       12  \n",
       "3      0        8  \n",
       "4      0        9  \n",
       "\n",
       "[5 rows x 110 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "selected = ftrain[eventT]\n",
    "selected.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(13418, 110)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "selected.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 13418 entries, 0 to 3137701\n",
      "Columns: 110 entries, event_id to c_other\n",
      "dtypes: float64(2), int64(103), object(5)\n",
      "memory usage: 11.4+ MB\n"
     ]
    }
   ],
   "source": [
    "selected.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>event_id</th>\n",
       "      <th>user_id</th>\n",
       "      <th>lat</th>\n",
       "      <th>lng</th>\n",
       "      <th>c_1</th>\n",
       "      <th>c_2</th>\n",
       "      <th>c_3</th>\n",
       "      <th>c_4</th>\n",
       "      <th>c_5</th>\n",
       "      <th>c_6</th>\n",
       "      <th>...</th>\n",
       "      <th>c_92</th>\n",
       "      <th>c_93</th>\n",
       "      <th>c_94</th>\n",
       "      <th>c_95</th>\n",
       "      <th>c_96</th>\n",
       "      <th>c_97</th>\n",
       "      <th>c_98</th>\n",
       "      <th>c_99</th>\n",
       "      <th>c_100</th>\n",
       "      <th>c_other</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>1.341800e+04</td>\n",
       "      <td>1.341800e+04</td>\n",
       "      <td>8062.000000</td>\n",
       "      <td>8062.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>2.140873e+09</td>\n",
       "      <td>2.134713e+09</td>\n",
       "      <td>25.727517</td>\n",
       "      <td>-24.807209</td>\n",
       "      <td>2.359964</td>\n",
       "      <td>1.464972</td>\n",
       "      <td>1.323372</td>\n",
       "      <td>0.888732</td>\n",
       "      <td>1.159711</td>\n",
       "      <td>2.479654</td>\n",
       "      <td>...</td>\n",
       "      <td>0.064913</td>\n",
       "      <td>0.083992</td>\n",
       "      <td>0.093755</td>\n",
       "      <td>0.070502</td>\n",
       "      <td>0.082427</td>\n",
       "      <td>0.233790</td>\n",
       "      <td>0.082874</td>\n",
       "      <td>0.076837</td>\n",
       "      <td>0.073558</td>\n",
       "      <td>57.554777</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>1.232469e+09</td>\n",
       "      <td>1.254357e+09</td>\n",
       "      <td>21.162472</td>\n",
       "      <td>91.900619</td>\n",
       "      <td>19.331141</td>\n",
       "      <td>2.959769</td>\n",
       "      <td>2.720104</td>\n",
       "      <td>1.972209</td>\n",
       "      <td>15.695718</td>\n",
       "      <td>7.375475</td>\n",
       "      <td>...</td>\n",
       "      <td>0.309890</td>\n",
       "      <td>0.377730</td>\n",
       "      <td>0.388404</td>\n",
       "      <td>0.312148</td>\n",
       "      <td>0.503164</td>\n",
       "      <td>15.553234</td>\n",
       "      <td>0.356777</td>\n",
       "      <td>0.455338</td>\n",
       "      <td>0.337954</td>\n",
       "      <td>110.916584</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.040700e+05</td>\n",
       "      <td>1.329876e+06</td>\n",
       "      <td>-86.151000</td>\n",
       "      <td>-157.991000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.081551e+09</td>\n",
       "      <td>1.027696e+09</td>\n",
       "      <td>3.608000</td>\n",
       "      <td>-96.886500</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>14.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>2.122509e+09</td>\n",
       "      <td>2.150758e+09</td>\n",
       "      <td>34.040000</td>\n",
       "      <td>-76.794000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>38.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>3.206782e+09</td>\n",
       "      <td>3.220623e+09</td>\n",
       "      <td>42.983750</td>\n",
       "      <td>98.656750</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>75.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>4.294677e+09</td>\n",
       "      <td>4.294033e+09</td>\n",
       "      <td>61.498000</td>\n",
       "      <td>174.777000</td>\n",
       "      <td>2186.000000</td>\n",
       "      <td>82.000000</td>\n",
       "      <td>85.000000</td>\n",
       "      <td>71.000000</td>\n",
       "      <td>1801.000000</td>\n",
       "      <td>306.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>9.000000</td>\n",
       "      <td>10.000000</td>\n",
       "      <td>9.000000</td>\n",
       "      <td>23.000000</td>\n",
       "      <td>1801.000000</td>\n",
       "      <td>9.000000</td>\n",
       "      <td>16.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>9664.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8 rows × 105 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           event_id       user_id          lat          lng           c_1  \\\n",
       "count  1.341800e+04  1.341800e+04  8062.000000  8062.000000  13418.000000   \n",
       "mean   2.140873e+09  2.134713e+09    25.727517   -24.807209      2.359964   \n",
       "std    1.232469e+09  1.254357e+09    21.162472    91.900619     19.331141   \n",
       "min    1.040700e+05  1.329876e+06   -86.151000  -157.991000      0.000000   \n",
       "25%    1.081551e+09  1.027696e+09     3.608000   -96.886500      0.000000   \n",
       "50%    2.122509e+09  2.150758e+09    34.040000   -76.794000      1.000000   \n",
       "75%    3.206782e+09  3.220623e+09    42.983750    98.656750      3.000000   \n",
       "max    4.294677e+09  4.294033e+09    61.498000   174.777000   2186.000000   \n",
       "\n",
       "                c_2           c_3           c_4           c_5           c_6  \\\n",
       "count  13418.000000  13418.000000  13418.000000  13418.000000  13418.000000   \n",
       "mean       1.464972      1.323372      0.888732      1.159711      2.479654   \n",
       "std        2.959769      2.720104      1.972209     15.695718      7.375475   \n",
       "min        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "25%        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "50%        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "75%        2.000000      2.000000      1.000000      1.000000      2.000000   \n",
       "max       82.000000     85.000000     71.000000   1801.000000    306.000000   \n",
       "\n",
       "           ...               c_92          c_93          c_94          c_95  \\\n",
       "count      ...       13418.000000  13418.000000  13418.000000  13418.000000   \n",
       "mean       ...           0.064913      0.083992      0.093755      0.070502   \n",
       "std        ...           0.309890      0.377730      0.388404      0.312148   \n",
       "min        ...           0.000000      0.000000      0.000000      0.000000   \n",
       "25%        ...           0.000000      0.000000      0.000000      0.000000   \n",
       "50%        ...           0.000000      0.000000      0.000000      0.000000   \n",
       "75%        ...           0.000000      0.000000      0.000000      0.000000   \n",
       "max        ...           7.000000      9.000000     10.000000      9.000000   \n",
       "\n",
       "               c_96          c_97          c_98          c_99         c_100  \\\n",
       "count  13418.000000  13418.000000  13418.000000  13418.000000  13418.000000   \n",
       "mean       0.082427      0.233790      0.082874      0.076837      0.073558   \n",
       "std        0.503164     15.553234      0.356777      0.455338      0.337954   \n",
       "min        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "25%        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "50%        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "75%        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "max       23.000000   1801.000000      9.000000     16.000000      7.000000   \n",
       "\n",
       "            c_other  \n",
       "count  13418.000000  \n",
       "mean      57.554777  \n",
       "std      110.916584  \n",
       "min        0.000000  \n",
       "25%       14.000000  \n",
       "50%       38.000000  \n",
       "75%       75.000000  \n",
       "max     9664.000000  \n",
       "\n",
       "[8 rows x 105 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "selected.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "event_id          0\n",
       "user_id           0\n",
       "start_time        0\n",
       "city           7092\n",
       "state          8868\n",
       "zip           12004\n",
       "country        7065\n",
       "lat            5356\n",
       "lng            5356\n",
       "c_1               0\n",
       "c_2               0\n",
       "c_3               0\n",
       "c_4               0\n",
       "c_5               0\n",
       "c_6               0\n",
       "c_7               0\n",
       "c_8               0\n",
       "c_9               0\n",
       "c_10              0\n",
       "c_11              0\n",
       "c_12              0\n",
       "c_13              0\n",
       "c_14              0\n",
       "c_15              0\n",
       "c_16              0\n",
       "c_17              0\n",
       "c_18              0\n",
       "c_19              0\n",
       "c_20              0\n",
       "c_21              0\n",
       "              ...  \n",
       "c_72              0\n",
       "c_73              0\n",
       "c_74              0\n",
       "c_75              0\n",
       "c_76              0\n",
       "c_77              0\n",
       "c_78              0\n",
       "c_79              0\n",
       "c_80              0\n",
       "c_81              0\n",
       "c_82              0\n",
       "c_83              0\n",
       "c_84              0\n",
       "c_85              0\n",
       "c_86              0\n",
       "c_87              0\n",
       "c_88              0\n",
       "c_89              0\n",
       "c_90              0\n",
       "c_91              0\n",
       "c_92              0\n",
       "c_93              0\n",
       "c_94              0\n",
       "c_95              0\n",
       "c_96              0\n",
       "c_97              0\n",
       "c_98              0\n",
       "c_99              0\n",
       "c_100             0\n",
       "c_other           0\n",
       "Length: 110, dtype: int64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "selected.isnull().sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 获取标签y值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_train= 13418\n",
    "y_train = train.interested.values[:n_train]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "the origin shape of the trian(13418, 110)\n",
      "y shape:(13418,)\n"
     ]
    }
   ],
   "source": [
    "print(\"the origin shape of the trian{}\".format(X_train.shape))\n",
    "print('y shape:{}'.format(y_train.shape))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 数据处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda3\\envs\\py3\\lib\\site-packages\\ipykernel_launcher.py:4: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  after removing the cwd from sys.path.\n",
      "D:\\Anaconda3\\envs\\py3\\lib\\site-packages\\ipykernel_launcher.py:6: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \n",
      "D:\\Anaconda3\\envs\\py3\\lib\\site-packages\\ipykernel_launcher.py:8: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \n",
      "D:\\Anaconda3\\envs\\py3\\lib\\site-packages\\ipykernel_launcher.py:10: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  # Remove the CWD from sys.path while we load stuff.\n"
     ]
    }
   ],
   "source": [
    "from sklearn.preprocessing import LabelEncoder\n",
    "LE = LabelEncoder()\n",
    "LE.fit(list(selected['city'].values))\n",
    "selected['city'] = LE.transform(list(selected['city'].values))\n",
    "LE.fit(list(selected['state'].values))\n",
    "selected['state'] = LE.transform(list(selected['state'].values))\n",
    "LE.fit(list(selected['zip'].values))\n",
    "selected['zip'] = LE.transform(list(selected['zip'].values))\n",
    "LE.fit(list(selected['country'].values))\n",
    "selected['country'] = LE.transform(list(selected['country'].values))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>event_id</th>\n",
       "      <th>user_id</th>\n",
       "      <th>start_time</th>\n",
       "      <th>city</th>\n",
       "      <th>state</th>\n",
       "      <th>zip</th>\n",
       "      <th>country</th>\n",
       "      <th>lat</th>\n",
       "      <th>lng</th>\n",
       "      <th>c_1</th>\n",
       "      <th>...</th>\n",
       "      <th>c_92</th>\n",
       "      <th>c_93</th>\n",
       "      <th>c_94</th>\n",
       "      <th>c_95</th>\n",
       "      <th>c_96</th>\n",
       "      <th>c_97</th>\n",
       "      <th>c_98</th>\n",
       "      <th>c_99</th>\n",
       "      <th>c_100</th>\n",
       "      <th>c_other</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>684921758</td>\n",
       "      <td>3647864012</td>\n",
       "      <td>2012-10-31T00:00:00.001Z</td>\n",
       "      <td>1263</td>\n",
       "      <td>102</td>\n",
       "      <td>919</td>\n",
       "      <td>94</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>244999119</td>\n",
       "      <td>3476440521</td>\n",
       "      <td>2012-11-03T00:00:00.001Z</td>\n",
       "      <td>1263</td>\n",
       "      <td>102</td>\n",
       "      <td>919</td>\n",
       "      <td>94</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3928440935</td>\n",
       "      <td>517514445</td>\n",
       "      <td>2012-11-05T00:00:00.001Z</td>\n",
       "      <td>1263</td>\n",
       "      <td>102</td>\n",
       "      <td>919</td>\n",
       "      <td>94</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2582345152</td>\n",
       "      <td>781585781</td>\n",
       "      <td>2012-10-30T00:00:00.001Z</td>\n",
       "      <td>1263</td>\n",
       "      <td>102</td>\n",
       "      <td>919</td>\n",
       "      <td>94</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1051165850</td>\n",
       "      <td>1016098580</td>\n",
       "      <td>2012-09-27T00:00:00.001Z</td>\n",
       "      <td>1263</td>\n",
       "      <td>102</td>\n",
       "      <td>919</td>\n",
       "      <td>94</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 110 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     event_id     user_id                start_time  city  state  zip  \\\n",
       "0   684921758  3647864012  2012-10-31T00:00:00.001Z  1263    102  919   \n",
       "1   244999119  3476440521  2012-11-03T00:00:00.001Z  1263    102  919   \n",
       "2  3928440935   517514445  2012-11-05T00:00:00.001Z  1263    102  919   \n",
       "3  2582345152   781585781  2012-10-30T00:00:00.001Z  1263    102  919   \n",
       "4  1051165850  1016098580  2012-09-27T00:00:00.001Z  1263    102  919   \n",
       "\n",
       "   country  lat  lng  c_1   ...     c_92  c_93  c_94  c_95  c_96  c_97  c_98  \\\n",
       "0       94  NaN  NaN    2   ...        0     1     0     0     0     0     0   \n",
       "1       94  NaN  NaN    2   ...        0     0     0     0     0     0     0   \n",
       "2       94  NaN  NaN    0   ...        0     0     0     0     0     0     0   \n",
       "3       94  NaN  NaN    1   ...        0     0     0     0     0     0     0   \n",
       "4       94  NaN  NaN    1   ...        0     0     0     0     0     0     0   \n",
       "\n",
       "   c_99  c_100  c_other  \n",
       "0     0      0        9  \n",
       "1     0      0        7  \n",
       "2     0      0       12  \n",
       "3     0      0        8  \n",
       "4     0      0        9  \n",
       "\n",
       "[5 rows x 110 columns]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "selected.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda3\\envs\\py3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n",
      "D:\\Anaconda3\\envs\\py3\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \n",
      "D:\\Anaconda3\\envs\\py3\\lib\\site-packages\\ipykernel_launcher.py:3: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  This is separate from the ipykernel package so we can avoid doing imports until\n",
      "D:\\Anaconda3\\envs\\py3\\lib\\site-packages\\ipykernel_launcher.py:4: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  after removing the cwd from sys.path.\n",
      "D:\\Anaconda3\\envs\\py3\\lib\\site-packages\\ipykernel_launcher.py:5: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \"\"\"\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>event_id</th>\n",
       "      <th>user_id</th>\n",
       "      <th>city</th>\n",
       "      <th>state</th>\n",
       "      <th>zip</th>\n",
       "      <th>country</th>\n",
       "      <th>lat</th>\n",
       "      <th>lng</th>\n",
       "      <th>c_1</th>\n",
       "      <th>c_2</th>\n",
       "      <th>...</th>\n",
       "      <th>c_95</th>\n",
       "      <th>c_96</th>\n",
       "      <th>c_97</th>\n",
       "      <th>c_98</th>\n",
       "      <th>c_99</th>\n",
       "      <th>c_100</th>\n",
       "      <th>c_other</th>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "      <th>hour</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>684921758</td>\n",
       "      <td>3647864012</td>\n",
       "      <td>1263</td>\n",
       "      <td>102</td>\n",
       "      <td>919</td>\n",
       "      <td>94</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>2012</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>244999119</td>\n",
       "      <td>3476440521</td>\n",
       "      <td>1263</td>\n",
       "      <td>102</td>\n",
       "      <td>919</td>\n",
       "      <td>94</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>2012</td>\n",
       "      <td>11</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3928440935</td>\n",
       "      <td>517514445</td>\n",
       "      <td>1263</td>\n",
       "      <td>102</td>\n",
       "      <td>919</td>\n",
       "      <td>94</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "      <td>2012</td>\n",
       "      <td>11</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2582345152</td>\n",
       "      <td>781585781</td>\n",
       "      <td>1263</td>\n",
       "      <td>102</td>\n",
       "      <td>919</td>\n",
       "      <td>94</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>2012</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1051165850</td>\n",
       "      <td>1016098580</td>\n",
       "      <td>1263</td>\n",
       "      <td>102</td>\n",
       "      <td>919</td>\n",
       "      <td>94</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>2012</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 112 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     event_id     user_id  city  state  zip  country  lat  lng  c_1  c_2  \\\n",
       "0   684921758  3647864012  1263    102  919       94  NaN  NaN    2    0   \n",
       "1   244999119  3476440521  1263    102  919       94  NaN  NaN    2    0   \n",
       "2  3928440935   517514445  1263    102  919       94  NaN  NaN    0    0   \n",
       "3  2582345152   781585781  1263    102  919       94  NaN  NaN    1    0   \n",
       "4  1051165850  1016098580  1263    102  919       94  NaN  NaN    1    1   \n",
       "\n",
       "   ...   c_95  c_96  c_97  c_98  c_99  c_100  c_other  year  month  hour  \n",
       "0  ...      0     0     0     0     0      0        9  2012     10     0  \n",
       "1  ...      0     0     0     0     0      0        7  2012     11     0  \n",
       "2  ...      0     0     0     0     0      0       12  2012     11     0  \n",
       "3  ...      0     0     0     0     0      0        8  2012     10     0  \n",
       "4  ...      0     0     0     0     0      0        9  2012      9     0  \n",
       "\n",
       "[5 rows x 112 columns]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "selected['date'] = pd.to_datetime(selected['start_time'])\n",
    "selected['year'] = selected['date'].dt.year\n",
    "selected['month'] = selected['date'].dt.month\n",
    "selected['hour'] = selected['date'].dt.hour\n",
    "selected.drop(['date','start_time'], axis=1, inplace=True)\n",
    "selected.head(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 处理lat,lng缺省值(空值还是比较多，尝试使用mean值去代替)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda3\\envs\\py3\\lib\\site-packages\\ipykernel_launcher.py:4: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  after removing the cwd from sys.path.\n"
     ]
    }
   ],
   "source": [
    "miss_data = ['lat','lng']\n",
    "for column in miss_data:\n",
    "    mean = int(selected[column].mean(skipna=True))\n",
    "    selected[column] = selected[column].replace(np.NaN, mean)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>event_id</th>\n",
       "      <th>user_id</th>\n",
       "      <th>city</th>\n",
       "      <th>state</th>\n",
       "      <th>zip</th>\n",
       "      <th>country</th>\n",
       "      <th>lat</th>\n",
       "      <th>lng</th>\n",
       "      <th>c_1</th>\n",
       "      <th>c_2</th>\n",
       "      <th>...</th>\n",
       "      <th>c_95</th>\n",
       "      <th>c_96</th>\n",
       "      <th>c_97</th>\n",
       "      <th>c_98</th>\n",
       "      <th>c_99</th>\n",
       "      <th>c_100</th>\n",
       "      <th>c_other</th>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "      <th>hour</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>684921758</td>\n",
       "      <td>3647864012</td>\n",
       "      <td>1263</td>\n",
       "      <td>102</td>\n",
       "      <td>919</td>\n",
       "      <td>94</td>\n",
       "      <td>25.0</td>\n",
       "      <td>-24.0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>2012</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>244999119</td>\n",
       "      <td>3476440521</td>\n",
       "      <td>1263</td>\n",
       "      <td>102</td>\n",
       "      <td>919</td>\n",
       "      <td>94</td>\n",
       "      <td>25.0</td>\n",
       "      <td>-24.0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>2012</td>\n",
       "      <td>11</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3928440935</td>\n",
       "      <td>517514445</td>\n",
       "      <td>1263</td>\n",
       "      <td>102</td>\n",
       "      <td>919</td>\n",
       "      <td>94</td>\n",
       "      <td>25.0</td>\n",
       "      <td>-24.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "      <td>2012</td>\n",
       "      <td>11</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2582345152</td>\n",
       "      <td>781585781</td>\n",
       "      <td>1263</td>\n",
       "      <td>102</td>\n",
       "      <td>919</td>\n",
       "      <td>94</td>\n",
       "      <td>25.0</td>\n",
       "      <td>-24.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>2012</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1051165850</td>\n",
       "      <td>1016098580</td>\n",
       "      <td>1263</td>\n",
       "      <td>102</td>\n",
       "      <td>919</td>\n",
       "      <td>94</td>\n",
       "      <td>25.0</td>\n",
       "      <td>-24.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>2012</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 112 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     event_id     user_id  city  state  zip  country   lat   lng  c_1  c_2  \\\n",
       "0   684921758  3647864012  1263    102  919       94  25.0 -24.0    2    0   \n",
       "1   244999119  3476440521  1263    102  919       94  25.0 -24.0    2    0   \n",
       "2  3928440935   517514445  1263    102  919       94  25.0 -24.0    0    0   \n",
       "3  2582345152   781585781  1263    102  919       94  25.0 -24.0    1    0   \n",
       "4  1051165850  1016098580  1263    102  919       94  25.0 -24.0    1    1   \n",
       "\n",
       "   ...   c_95  c_96  c_97  c_98  c_99  c_100  c_other  year  month  hour  \n",
       "0  ...      0     0     0     0     0      0        9  2012     10     0  \n",
       "1  ...      0     0     0     0     0      0        7  2012     11     0  \n",
       "2  ...      0     0     0     0     0      0       12  2012     11     0  \n",
       "3  ...      0     0     0     0     0      0        8  2012     10     0  \n",
       "4  ...      0     0     0     0     0      0        9  2012      9     0  \n",
       "\n",
       "[5 rows x 112 columns]"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "selected.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train = selected.drop(['event_id','user_id','city','state','zip','country','lat','lng','year','month','hour'],axis=1)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 分割测试集与训练集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train_part, X_val, y_train_part, y_val = train_test_split(X_train, y_train, train_size=0.8, random_state=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10734, 101)\n",
      "(2684, 101)\n"
     ]
    }
   ],
   "source": [
    "print(X_train_part.shape)\n",
    "print(X_val.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 聚类"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [],
   "source": [
    "def k_cluster_analysis(K, X_train, y_train, X_val, y_val):\n",
    "    start = time.time()\n",
    "    print('K-means begin with clusters:{}'.format(K));\n",
    "    \n",
    "    #K-means在训练集训练\n",
    "    mb_kmeans = MiniBatchKMeans(n_clusters = K)\n",
    "    mb_kmeans.fit(X_train)\n",
    "    \n",
    "    #在训练集和测试集上测试\n",
    "    y_train_pred = mb_kmeans.fit_predict(X_train)\n",
    "    y_val_pred = mb_kmeans.fit_predict(X_val)\n",
    "    \n",
    "    #K值评估标准（内部）\n",
    "    #轮廓系数silhouette Coefficient和Calinski-Harabasz Index\n",
    "    #分数越大越好\n",
    "    #CH_score1 = metrics.calinski_harabaz_score(X_train, mb_kmeans.predict(X_train))\n",
    "    CH_score2 =metrics.silhouette_score(X_train, mb_kmeans.predict(X_train))\n",
    "    #在校验集上评估(外部)\n",
    "    v_score = metrics.v_measure_score(y_val, y_val_pred)\n",
    "    \n",
    "    \n",
    "    end = time.time()\n",
    "    print('using time: ', str(end - start))\n",
    "    #print('CH_score1:{}'.format(CH_score1))\n",
    "    print('CH_score2: {}\\n'.format(CH_score2))\n",
    "    print('v_score: {}\\n'.format(v_score))\n",
    "    \n",
    "    return CH_score2,v_score"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 设置超参数（聚类数目K）搜索范围"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "K-means begin with clusters:3\n",
      "using time:  9.34853458404541\n",
      "CH_score2: 0.560626426938096\n",
      "\n",
      "v_score: 2.449568987309362e-05\n",
      "\n",
      "K-means begin with clusters:4\n",
      "using time:  9.203526496887207\n",
      "CH_score2: 0.5903632009124139\n",
      "\n",
      "v_score: 0.0005491831227680388\n",
      "\n",
      "K-means begin with clusters:5\n",
      "using time:  8.52948784828186\n",
      "CH_score2: 0.5335247354882774\n",
      "\n",
      "v_score: 0.00036811576443225027\n",
      "\n",
      "K-means begin with clusters:6\n",
      "using time:  8.791502714157104\n",
      "CH_score2: 0.42525163604223604\n",
      "\n",
      "v_score: 0.0004547165451451449\n",
      "\n",
      "K-means begin with clusters:7\n",
      "using time:  8.140465497970581\n",
      "CH_score2: 0.5052382926127709\n",
      "\n",
      "v_score: 0.0007690941899907622\n",
      "\n",
      "K-means begin with clusters:8\n",
      "using time:  8.366478443145752\n",
      "CH_score2: 0.4506379888160199\n",
      "\n",
      "v_score: 0.0016421807789270193\n",
      "\n",
      "K-means begin with clusters:9\n",
      "using time:  7.984456539154053\n",
      "CH_score2: 0.42187138520143186\n",
      "\n",
      "v_score: 0.0019389027063735902\n",
      "\n",
      "K-means begin with clusters:10\n",
      "using time:  7.861449718475342\n",
      "CH_score2: 0.36327059562357167\n",
      "\n",
      "v_score: 0.0015225408987396608\n",
      "\n"
     ]
    }
   ],
   "source": [
    "Ks = [3, 4, 5, 6, 7, 8, 9, 10]\n",
    "CH_scores2 = []\n",
    "v_scores = []\n",
    "for K in Ks:\n",
    "    ch, v = k_cluster_analysis(K, X_train_part, y_train_part, X_val, y_val)\n",
    "    CH_scores2.append(ch)\n",
    "    v_scores.append(v)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "n_cluster=4,CH_score=0.40分数最高,n_cluster=9,v_score分数最高(这时候数据是选择全部特征)\n",
    "n_cluster=4,CH_score=0.59分数最高,n_cluster=9,v_score分数最高(只选择c_1到c_9特征)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0xc585518>]"
      ]
     },
     "execution_count": 124,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAD8CAYAAABw1c+bAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xm81nP6x/HX1amo7EpMySmTkpAcjW0i2xSRJck2ZGiy\nhLQIMxjLCCE7/YwlSYytrDEIYzAtQmmdJDW02Ivi6PP747rPOGk5933Ofd+fe3k/H48enXN3L++T\nXPf3/nyv7/WxEAIiIlI8asUOICIi2aXCLyJSZFT4RUSKjAq/iEiRUeEXESkyKvwiIkVGhV9EpMgk\nVfjNrLOZzTSzOWY2eB332d/MppjZNDN7LZXHiohI9lhVF3CZWQkwCzgYWABMAI4PIXxY6T6bAf8C\nOocQ5pvZViGExck8VkREsqt2EvfpAMwJIcwFMLPRQDegcvE+AXgihDAfIISwOIXHrqFhw4ahtLQ0\nhR9DRKS4TZo0aWkIoVEy902m8DcBPqn0/QLgN7+4zw5AHTMbD2wM3BxCGJHkY9dQWlrKxIkTk4gm\nIiIAZvZxsvdNpvAn+zy7AwcC9YC3zOztVJ7AzHoDvQGaNWuWplgiIvJLyZzcXQhsW+n7ponbKlsA\njAshLA8hLAVeB3ZN8rEAhBCGhxDKQghljRol9WlFRESqIZnCPwFoaWbNzawu0BMY+4v7jAH2NbPa\nZlYfX86ZnuRjRUQki6pc6gkhlJvZOcA4oAS4N4Qwzcz6JP78rhDCdDN7AXgfWAXcE0KYCrC2x2bo\nZxERkSRU2c4ZQ1lZWdDJXRGR5JnZpBBCWTL31ZW7IiJFRoVfRKTIqPDH8t57cP/9kINLbSJS2FT4\ns+3LL+Gcc6B9e+jVC664InYiESky6bqAS6qyahXcey9cdBF88QWcdRZ8/TVcfjk0a+ZvAiIiWaDC\nnw0TJsDZZ/vv++4Lt90Gu+4KP/4In30GvXtDkyZwyCGxk4pIEdBSTyYtXepF/Te/gU8+gQcfhNdf\n96IPUKcOPPYY7LQTHHMMTJkSN6+IFAUV/kz46Se4807YYQdf3unXD2bOhJNOArPV77vJJvDss7DZ\nZnDooTB/fpzMIlI0VPjT7V//grIyX8Nv1867d264wQv8ujRpAs8/D99958X/q6+yl1dEio4Kf7os\nWgSnngr77ANLlsAjj8DLL/syTjLatoUnn4RZs+Coo2DlyozGFZHipcJfU+XlcPPNvqwzahQMHgwz\nZkCPHmsu61SlUye47z4YPx5OO807gURE0kxdPTXx2mvekz91Kvzud3DLLf4GUBMnnujr/Bdf7G2e\n11yTnqwiIgkq/NWxcCEMHAgPPwzbbedLNN26pX6Evy6DB8PHH8OQIf78ffqk53lFRFDhT80PP/iy\nzhVXeA/+pZfChRdC/frpfR0z7/VfuND7/5s0gcMPT+9riEjR0hp/sl56CXbZBQYN8rX4Dz+Ev/wl\n/UW/Qu3aMHq0j3bo2dMv/hIRSQMV/qrMnw/du/tVteXl8MwzMHYstGiR+ddu0MBfr3Fj6NoV5s7N\n/GuKSMFT4V+XFSvg6quhdWt47jm46io/iXvYYdnN0bix9/iXl0OXLvD559l9fREpOCr8a/Pss95X\n/6c/+QVVM2bAJZfAhhvGydOqlX/K+PhjOOII+P77ODlEpCCo8Fc2d64X1q5dfY39xRd9lk6zZrGT\n+YVhI0fCW2/BySf7WAgRkWpQ4QcflXDZZdCmDbzyClx3Hbz/Phx8cOxkq+ve3cc/PP44DBgQO42I\n5KnibucMAcaMgfPP92WU44+H66/39slcdf75MG8eDBvmPf7nnx87kYjkmeIt/LNmwbnnwrhxvp4/\nfjzst1/sVFUzgxtv9DHPF1wA227rI51FRJJUfEs9y5b5Llht2/p6+bBhMHlyfhT9CiUl8NBDsOee\nPuLhzTdjJxKRPFI8hT8EePRR2HFHH4Vwwgl+1H/eeb4hSr6pV887fZo18xPSM2fGTiQieaI4Cv+0\naXDggXDccdCokR8h33+/98jns4YNvce/pMR7/Bctip1IRPJAYRf+b76B/v19Q5QpU+COO3z0wd57\nx06WPttv71f3fvaZt6EuXx47kYjkuMIs/CF4z3urVnDTTdCrly/rnHmmHx0Xmg4dfK7P5Mk+16e8\nPHYiEclhhVf433sPOnb0i5yaNYN33oHhw31ZpJAdcQTceqsf/fft629+IiJrUTjtnMuX+xz7O+6A\nLbaAe+7xI/1ahffetk5nneXXI1x3nff4Dx4cO5GI5KDCKfx16sCrr/pyzhVXePEvRtdc4xNFL7rI\nP/GccELsRCKSYwqn8NetCxMnxhuklitq1fKOpU8/9c3ft9nG9w8QEUkorHWQYi/6FTbYwLeDbNkS\njjrKx0mLiCQUVuGXn22+ue8jUK+ej5b+739jJxKRHKHCX8i2286L/5dfevH/5pvYiUQkB6jwF7rd\ndvM9BaZO9bHOP/4YO5GIRJZU4TezzmY208zmmNkaPYJmtr+ZfW1mUxK/Lq30Z/PM7IPE7RPTGV6S\n9Lvf+bUML70EvXurx1+kyFXZ1WNmJcDtwMHAAmCCmY0NIXz4i7u+EULouo6n6RRCWFqzqFIjp53m\nPf5XXOFLQJdfHjuRiESSTDtnB2BOCGEugJmNBroBvyz8kusuv9x7/P/yF+/xP+202IlEJIJklnqa\nAJ9U+n5B4rZf2tvM3jez581sp0q3B+AfZjbJzHqv60XMrLeZTTSziUuWLEkqvKTIzJd8Dj7Yl3zG\njYudSEQiSNfJ3clAsxDCLsCtwFOV/mzfEEI7oAtwtpl1XNsThBCGhxDKQghljRo1SlMsWUOdOn6y\nt21bP9k7ZUrsRCKSZckU/oXAtpW+b5q47X9CCN+EEJYlvn4OqGNmDRPfL0z8vhh4El86kpg22QSe\nfRY228zbPOfPj51IRLIomcI/AWhpZs3NrC7QExhb+Q5mtrWZWeLrDonn/dzMGpjZxonbGwCHALqM\nNBc0aeKbuHz3nRf/r76KnUhEsqTKwh9CKAfOAcYB04FHQwjTzKyPmfVJ3K07MNXM3gNuAXqGEALQ\nGPhn4vZ/A8+GEF7IxA8i1dC2rY92mDXLRzusXBk7kYhkgYUc7OkuKysLEyeq5T9rHnoITjrJJ3k+\n+GBxjbIWKRBmNimEUJbMfQtnOqdU34kn+jr/xRd7m+c118ROJCIZpMIvbvBgv8BryBC/wKtPn6of\nIyJ5SYVfnBncdhssXAhnn+0nfw8/PHYqEckALebKz2rX9k3b27f3TdsnTIidSEQyQIVfVteggW/Y\n3rgxdO0Kc+fGTiQiaabCL2tq3Nh7/MvLoUsX+Pzz2IlEJI1U+GXtWrWCsWP9hO8RR8D338dOJCJp\nosIv67bPPjByJLz1Fpx8Mvz0U+xEIpIGKvyyft27ww03wOOPw403xk4jImmgwi9V69fPd/EaOhRW\nrIidRkRqSIVfknPhhbB4MYwYETuJiNSQCr8kZ//9oazMj/q11i+S11T4JTlmMGgQzJ4NY8bETiMi\nNaDCL8k7+mho0QKuvRZycKqriCRHhV+SV1ICAwbAv/8Nb7wRO42IVJMKv6Tm1FOhUSO47rrYSUSk\nmlT4JTX16sG55/qevVO1i6ZIPlLhl9SddRbUrw/XXx87iYhUgwq/pG6LLeCMM2DUKPjkk9hpRCRF\nKvxSPf36eWfPsGGxk4hIilT4pXq22843axk+HL78MnYaEUmBCr9U38CBsGwZ3HVX7CQikgIVfqm+\nXXf14W0336zhbSJ5RIVfambQIFi0CB58MHYSEUmSCr/UTKdOGt4mkmdU+KVmKoa3zZql4W0ieUKF\nX2pOw9tE8ooKv9SchreJ5BUVfkkPDW8TyRsq/JIe9epB374a3iaSB1T4JX0qhrcNHRo7iYishwq/\npM+WW8Lpp8NDD8GCBbHTiMg6qPBLeml4W+pefRU+/zx2CikiKvySXqWlPrzt7rs1vC0Z48bBAQf4\nr6++ip1GikRShd/MOpvZTDObY2aD1/Ln+5vZ12Y2JfHr0mQfKwVIw9uS8913cOaZ0LQpTJ8ORxwB\n338fO5UUgSoLv5mVALcDXYA2wPFm1mYtd30jhNAu8euKFB8rhUTD25Jz1VXw0UcwYgSMHAn//Kd/\nWiovj51MClwyR/wdgDkhhLkhhB+A0UC3JJ+/Jo+VfKbhbes3bZpvXXnKKT7vqEcPuO02GDvWdzfT\nFdCSQckU/iZA5f31FiRu+6W9zex9M3vezHZK8bFSaDp1gt131/C2tVm1Cv74R9h009VbX886Cy6/\nHO6/HwZrVVQyJ10ndycDzUIIuwC3Ak+l+gRm1tvMJprZxCVLlqQplkRTeXjb2LGx0+SWv/0N3nzT\ni37Dhqv/2aWX+hvAddfpegjJmGQK/0Jg20rfN03c9j8hhG9CCMsSXz8H1DGzhsk8ttJzDA8hlIUQ\nyho1apTCjyA5S8Pb1rRokb8h7refL/P8khnccosv/QwcCA88kP2MUvCSKfwTgJZm1tzM6gI9gdUO\n4cxsazOzxNcdEs/7eTKPlQJWuzb07w/vvOMnLgUuuMC7ee66y4v82pSU+Anfgw6CP/wBnn46uxml\n4FVZ+EMI5cA5wDhgOvBoCGGamfUxsz6Ju3UHpprZe8AtQM/g1vrYTPwgkqN69dLwtgovvgijRsFF\nF0Hr1uu/7wYbwBNPQPv2fvSvN05JIws5+BG8rKwsTJw4MXYMSZcrr/S16w8+gLZtY6eJ4/vv/Wcv\nKYH334cNN0zucUuWwG9/C599Bq+/DrvsktmckrfMbFIIoSyZ++rKXck8DW+Dq6+GuXN9iSfZog/+\naWncONhoI+jc2fv+RWpIhV8yr9iHt02b5ktdv/+9j2ZI1XbbefFfsQIOOQQWL05/RikqKvySHcU6\nvG3VKujTBzbeuGafeHbayfc6WLjQj/y/+SZ9GaXoqPBLdpSWwnHH+fC2YhpGdu+9fmJ26FBftqmJ\nvfaCxx/3cyVHHqlxGFJtKvySPcU2vG3RIv+ZO3b0rSnToUsXv7L31VfhxBN1VbRUiwq/ZE+7dr5G\nXSzD2/r3h+XL/VPOunr2q+PEE33J7Ikn/MR5DnbmSW5T4ZfsGjTIWxNHjoydJLNeeslPZifTs18d\n550HF18Mw4d7q6xICtTHL9kVAuyxB3z7LXz4ofe1F5rvv/d+e7PUevZTFQL07g333OOfos49NzOv\nI3lBffySu4pheNtf/wpz5sCdd2au6IP/Xd55Jxx1lH8CeOihzL2WFBQVfsm+Qh7e9uGH/nOdfDIc\neGDmX692bR8Dsf/+fgL5hRcy/5qS91T4JfsKdXhbxZz9jTeGG27I3utuuCGMGQM77wzHHANvv529\n15a8pMIvcZx6qs+iL6Thbffd529k119f8579VG2yCTz/PGyzDRx2mH/yEFkHFX6Jo3596NsXnnnG\nRxrku8WLf+7Z79UrTobGjX0CaN26vufx/PlxckjOU+GXeM4+u3CGt/Xv//PFaens2U9VixY+1+fb\nb/2aiaVL42WRnKXCL/FsuaVvNJLvw9teftmvS7jwQthxx9hpvJX06afh44/h0EP9DUmkEhV+ieuC\nC/yk6M03x05SPStW+BC2X//aL6jKFb/9LTzyCEye7F1UP/wQO5HkEBV+iSvfh7dV7tmvVy92mtUd\ncYRf3PXSSz4SetWq2IkkR6jwS3wDB/qadL4Nb5s+HYYMgZNO8v1xc9Gpp3rn1COP+JW9hXbdhFSL\nCr/El4/D2yp69jfaKLs9+9UxcCAMGAC33+7bYErRU+GX3JBvw9vuvx/eeMN79rfaKnaaql17LZxy\nClx2mS9LSVHTkDbJDSFAWZl3oEyfDrVy+JhkyRKfuLnTTjB+fG5nrezHH/1E77PP+tLPscfGTiRp\npCFtkn/yaXjbgAE/n5PIl6IPUKeOF/x99vGZ/v/4R+xEEkke/auVgnfMMdC8eW4Pb3vlFRgxwt+k\n2rSJnSZ19ev7G2vr1j7VU5+si5IKv+SOiuFtb78Nb74ZO82aKnr2t98eLrkkdprq23xzn+LZsKFv\n5ThzZuxEkmUq/JJbevXygnTttbGTrOmaa2D27Nzs2U/Vr37lc33MvKNq4cLYiSSLVPglt+Tq8LYZ\nM7zwn3giHHxw7DTp0bKlH/l/+aUPdfvii9iJJEtU+CX35NrwthB+7tm/8cbYadKrfXuf5T97NnTt\nCt99FzuRZIEKv+SeXBvedv/98PrrfgVsPvTsp6pTJ3j4Yd8Yp3t3b/uUgqbCL7kpV4a3LV3qV77u\nuy+cdlrcLJl09NF+7uL55/3n1FyfgqbCL7mptBR69Ig/vG3AAPj66/zr2a+O3r3hqqv86un+/XO3\npVZqrMD/JUteqxjedvfdcV7/1VfhgQe8Z3+nneJkyLaLL/ZhbsOG5WZnlaSFRjZIbjvkEPjgA5g3\nDzbYIHuvu2IF7LorlJfD1Kn5376ZilWr4OSTYdQo+L//g9NPj51IkqCRDVI4Yg1vGzLEx0cUQs9+\nqmrV8o3jO3f2bqYnn4ydSNJMR/yS2yqGty1fDh9+mJ119hkz/Gi/e3fvLCpWy5fDgQfClCne77//\n/rETyXroiF8KR8XwtpkzszO8LQQfy1C/fuH17KeqQQOf5Nmihe/m9e67sRNJmiRV+M2ss5nNNLM5\nZjZ4Pffbw8zKzax7pdvmmdkHZjbFzHQYL6nL5vC2ESPgtde8Z79x48y+Vj7YcksYNw4228yXfubM\niZ1I0qDKwm9mJcDtQBegDXC8ma0xljBxv2uBF9fyNJ1CCO2S/RgisppsDW9butRfZ599/AIycdtu\n63N9fvrJT7Z/+mnsRFJDyRzxdwDmhBDmhhB+AEYD3dZyv77A48DiNOYTcb16+dHndddl7jUGDvSe\n/bvvLvye/VS1bg3PPQeLF/uVvuPHx04kNZDMv+4mwCeVvl+QuO1/zKwJcBSwtj3dAvAPM5tkZr3X\n9SJm1tvMJprZxCVLliQRS4pKxfC2p5/2k7zpNn68j2YYOLB4evZT1aGDD89bscKLf/fu8NFHsVNJ\nNaTrsGYYcGEIYW3Xee8bQmiHLxWdbWYd1/YEIYThIYSyEEJZo0aN0hRLCsrZZ3trZbqHt61c6W2L\nLVrAn/6U3ucuNPvv71tjXnmlj3fYcUffm2DZstjJJAXJFP6FwLaVvm+auK2yMmC0mc0DugN3mNmR\nACGEhYnfFwNP4ktHIqlr2NDX3keOTO/8+Iqe/Tvu8E8Wsn716vkb5KxZvm/vX/8KO+zgJ8Y14ycv\nJFP4JwAtzay5mdUFegKr9dWFEJqHEEpDCKXAY8BZIYSnzKyBmW0MYGYNgEOAqWn9CaS49O+f3uFt\nM2d64Tr+eJ9JL8lr0gQefBDeestPAJ9yCuy1l38vOa3Kwh9CKAfOAcYB04FHQwjTzKyPmfWp4uGN\ngX+a2XvAv4FnQwgv1DS0FLGK4W133VXz4W0hwJlnqme/pvbc04v9iBE+RnvvveGkk3JjpLasla7c\nlfzz7ru+gciQIXDhhdV/nhEj/Cj17rt9MqXU3LJl/t9l6FAoKYHBg33CabGNvYgglSt3VfglP9V0\neNvSpd6i2KoVvPGG2jfTbd48v+L673+HZs28DbdHD78SWzJCIxuk8NV0eNugQcUzZz+G0lJ49FFv\nk91iC+jZEzp2hMmTYycTVPglXx14IOy2G1x/feqdJOPH+/TJAQNg550zEk8S9tsPJk6E4cP9RHpZ\nmY95XrQodrKipsIv+any8Lann07+cStX+hC25s3hz3/OXD75WUkJnHGGb+h+wQV+bqVlS3/TXrky\ndrqipMIv+at7d19SSGWMw3XX+ZuFevazb9NN/aTvtGl+IVjFzmZjxmibxyxT4Zf8VTG87V//Sm54\n26xZcPXVvt7cuXPm88natWzpI7bHjfMT80ce6Sfrp+oSn2xR4Zf8dtppPrytqv1hK3r2N9wQbrop\nO9lk/Q45xDd5ueUWmDQJ2rWDc86Bzz+PnazgqfBLfkt2eNvIkfDKK95jvvXW2csn61enjv/3mz3b\nz73cdZd/Irj1Vvjxx9jpCpYKv+S/qoa3ff65n1Tcc09dqJWrttwSbrvNPwHsvjuce65vfzluXOxk\nBUmFX/JfVcPbBg3y8Q7Dh6tnP9e1beubvowZ40f8nTvD4Yf7+RlJG/1fIIXhggt8h6hfDm97/XW4\n914/Caye/fxg5nv8Tp3qXVivveZvCAMG+EV3UmMq/FIYmjf/eXhbRXGomLNfWgqXXho1nlTDBhv4\nxjizZ8Pvf++D9Fq29E9uP/0UO11eU+GXwjFwIHz7rQ9dA79AaMYM9eznu8aN4Z57/ArgVq38zXz3\n3f2TgFSLCr8Ujvbt4aCDYNgwv0joqqvguOOgS5fYySQd2rf3pbtHHoEvv/SLwI491gfCSUpU+KWw\nXHghfPqpFwX17BceM1/SmzEDrrjCN4Bv3dp3BNP2j0lT4ZfCUjG8belSuOYa2Gab2IkkE+rV81lL\nM2f6Uf/VV/sy0IMPavvHJKjwS2Ex8xO8gwf7WrAUtqZNf97+sWlTPwm8117w9tuxk+U0FX4pPB06\n+NG+evaLR8X2jw88AJ984sX/5JPXfl2HqPCLSIGoVcuP+GfNgosv9t2/dtgBnnoqdrKco8IvIoVl\no418zX/6dL/w64QTYMKE2Klyigq/iBSm5s19eN/WW/uVwPPnx06UM1T4RaRwbbUVPPMMfP89dO0K\n33wTO1FOUOEXkcLWpg089piP7e7ZE8rLYyeKToVfRArfQQfBnXfC889Dv36x00RXO3YAEZGsOOMM\n7/gZOtS7ffr2jZ0oGhV+ESkeQ4bAnDlw/vnQogUcdljsRFFoqUdEikdJiW/Ys9tuvt7/3nuxE0Wh\nwi8ixaVBAxg7FjbbzDt9/vvf2ImyToVfRIrPr37lbZ5ffeU9/suXx06UVSr8IlKcdt0VRo+Gd9+F\nk04qqqmeKvwiUrwOO8w37nnqKZ/oWiTU1SMixa1vX2/zvP5639P3jDNiJ8o4FX4RkZtugv/8B848\n02f8HHRQ7EQZpaUeEZHatX29v00b6N7dxzsUsKQKv5l1NrOZZjbHzNa5EGZme5hZuZl1T/WxIiJR\nbbKJd/rUq+dtnosXx06UMVUWfjMrAW4HugBtgOPNrM067nct8GKqjxURyQnNmnmP/2efwZFHwooV\nsRNlRDJH/B2AOSGEuSGEH4DRQLe13K8v8DiwuBqPFRHJDXvs4Vf3vvUWnHpqQbZ5JlP4mwCfVPp+\nQeK2/zGzJsBRwJ2pPlZEJOccfTRcey088ghcfnnsNGmXrq6eYcCFIYRVZlatJzCz3kBvgGbNmqUp\nlohINQ0c6G2eV14Jv/617+dbIJIp/AuBbSt93zRxW2VlwOhE0W8IHGpm5Uk+FoAQwnBgOEBZWVlI\nJryISMaY+Qz/jz6C00+H0lLo2DF2qrRIZqlnAtDSzJqbWV2gJzC28h1CCM1DCKUhhFLgMeCsEMJT\nyTxWRCRn1anju3dtvz0cdRTMnh07UVpUWfhDCOXAOcA4YDrwaAhhmpn1MbM+1XlszWOLiGTJ5pt7\nm2etWj7i4YsvYieqMQsh91ZVysrKwsSJE2PHEBH52ZtvwgEHwF57wYsvQt26sROtxswmhRDKkrmv\nrtwVEUnGPvvAfffBa69B796QgwfNydKsHhGRZJ1wgm/deNllvm/vxRfHTlQtKvwiIqn485/9JO8l\nl3ibZ48esROlTEs9IiKpMIN77oF99/Xe/rffjp0oZSr8IiKp2mADePJJaNIEunWDefNiJ0qJCr+I\nSHU0bAjPPgs//OBtnl9/HTtR0lT4RUSqq3VreOIJH+1w7LHw44+xEyVFhV9EpCY6dYLhw+Gll3wb\nxzxo81RXj4hITfXq5Uf9Q4ZAq1bQr1/sROulwi8ikg5XX+09/v37Q4sWftI3R2mpR0QkHWrVghEj\nfCOXE06AyZNjJ1onFX4RkXSpVw/GjPGOn8MPhwULYidaKxV+EZF02nprn+b57bde/Jcti51oDSr8\nIiLptvPO8Oij8P77vuzz00+xE61GhV9EJBM6d4Zbb4Wnn4YBA2KnWY26ekREMuWss3yg27Bh0LKl\nf58DVPhFRDJp6FBv8zz3XG/z7Nw5diIt9YiIZFRJCTz8sK/79+gBU6fGTqTCLyKScRtt5Gv9G23k\nA90++yxqHBV+EZFsaNrUi//SpX5V73ffRYuiwi8iki277w6jRsGECXDKKbBqVZQYKvwiItnUrZuf\n8H3sMfjTn6JEUFePiEi29evn0zyvucbbPHv1yurLq/CLiGSbmV/cNXcu9O4NpaU+1z9LtNQjIhJD\nnTrw97/DDjvA0UfDzJlZe2kVfhGRWDbd1PftrVvX2zyXLs3Ky6rwi4jEVFrqo5wXLICjjoKVKzP+\nkir8IiKx7bmnb+LSurVv6JJhOrkrIpILevTwX1mgI34RkSKjwi8iUmRU+EVEiowKv4hIkVHhFxEp\nMir8IiJFRoVfRKTIqPCLiBQZCyHEzrAGM1sCfFzNhzcEsjPwoubyKSvkV958ygr5lTefskJ+5a1J\n1u1CCI2SuWNOFv6aMLOJIYSy2DmSkU9ZIb/y5lNWyK+8+ZQV8itvtrJqqUdEpMio8IuIFJlCLPzD\nYwdIQT5lhfzKm09ZIb/y5lNWyK+8WclacGv8IiKyfoV4xC8iIutREIXfzDY0s3+b2XtmNs3M/hI7\nUzLMrMTM3jWzZ2JnWR8zm2dmH5jZFDObGDtPVcxsMzN7zMxmmNl0M9srdqa1MbNWib/Til/fmNn5\nsXOtj5n1S/w/NtXMHjazDWNnWhczOy+Rc1ou/r2a2b1mttjMpla6bQsze8nMZid+3zwTr10QhR9Y\nCRwQQtgVaAd0NrM9I2dKxnnA9NghktQphNAuT9ribgZeCCG0BnYlR/+OQwgzE3+n7YDdge+AJyPH\nWiczawKcC5SFENoCJUDPuKnWzszaAmcAHfB/A13N7NdxU63hfqDzL24bDLwcQmgJvJz4Pu0KovAH\ntyzxbZ3Er5w+eWFmTYHDgHtiZykkZrYp0BH4G0AI4YcQwldxUyXlQOA/IYTqXriYLbWBemZWG6gP\n/DdynnX7Ofq1AAACh0lEQVTZEXgnhPBdCKEceA04OnKm1YQQXge++MXN3YAHEl8/AByZidcuiMIP\n/1s2mQIsBl4KIbwTO1MVhgGDgFWxgyQhAP8ws0lm1jt2mCo0B5YA9yWW0e4xswaxQyWhJ/Bw7BDr\nE0JYCAwF5gOfAl+HEF6Mm2qdpgK/NbMtzaw+cCiwbeRMyWgcQvg08fVnQONMvEjBFP4Qwk+Jj8xN\ngQ6Jj3o5ycy6AotDCJNiZ0nSvom/2y7A2WbWMXag9agNtAfuDCHsBiwnQx+X08XM6gJHAH+PnWV9\nEuvN3fA3118BDczspLip1i6EMB24FngReAGYAvwUNVSKgrdcZmTlomAKf4XEx/pXWXPtLJfsAxxh\nZvOA0cABZjYybqR1SxzpEUJYjK9Bd4ibaL0WAAsqfeJ7DH8jyGVdgMkhhEWxg1ThIOCjEMKSEMKP\nwBPA3pEzrVMI4W8hhN1DCB2BL4FZsTMlYZGZbQOQ+H1xJl6kIAq/mTUys80SX9cDDgZmxE21biGE\ni0IITUMIpfhH/FdCCDl55GRmDcxs44qvgUPwj9E5KYTwGfCJmbVK3HQg8GHESMk4nhxf5kmYD+xp\nZvXNzPC/25w8cQ5gZlslfm+Gr++PipsoKWOBUxJfnwKMycSL1M7Ek0awDfCAmZXgb2aPhhByukUy\njzQGnvT/z6kNjAohvBA3UpX6Ag8lllDmAr0i51mnxJvpwcAfY2epSgjhHTN7DJgMlAPvkttXxT5u\nZlsCPwJn59pJfjN7GNgfaGhmC4DLgCHAo2b2B3xCcY+MvLau3BURKS4FsdQjIiLJU+EXESkyKvwi\nIkVGhV9EpMio8IuIFBkVfhGRIqPCLyJSZFT4RUSKzP8DmMjaC3oJmBcAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0xdc216a0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.plot(Ks, np.array(CH_scores2), 'r-')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 当n_clusters=4时CH_scores评分最高"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0xc3997be0>]"
      ]
     },
     "execution_count": 125,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAD8CAYAAAC2PJlnAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl4FGW2x/HvSbMjGISgbAIqqCwaISAu6IAgi0hYlFW2\nQZBh0cE7V/CqM46CgjsogogoUQQBWcKAIiDIZkjCJptoxIWELaJssiY5948unBAg3YQk1d05n+fp\np7ur3rfq1xFz8r5VXSWqijHGGJMTYW4HMMYYE7ysiBhjjMkxKyLGGGNyzIqIMcaYHLMiYowxJses\niBhjjMkxKyLGGGNyzIqIMcaYHLMiYowxJscK+dNIRFoCYwAPMElVR2VZL8761sAxoLeqrs+ur4i8\nDNwPnAJ+APqo6kFn3ZNAXyAdeFRVFznL6wMfAMWBhcBj6uMr9+XKldNq1ar58zGNMcY41q1b96uq\nRvhqJ74ueyIiHuA7oDmQDCQAXVV1W6Y2rYEheIvIrcAYVb01u74ici/wpaqmichoAFUdJiK1gGlA\nQ6AisASoqarpIhIPPAqsxVtExqrqZ9nlj4qK0sTERF8/B2OMMZmIyDpVjfLVzp/prIZAkqruVNVT\nwHQgOkubaCBGveKAcBGpkF1fVf1CVdOc/nFA5Uzbmq6qJ1X1RyAJaOhsr7SqxjmjjxignR/5jTHG\n5BF/ikglYFem98nOMn/a+NMX4K/AmRFFdttK9mNbxhhj8onrB9ZF5CkgDZiai9vsLyKJIpKYmpqa\nW5s1xhiThT9FJAWokul9ZWeZP22y7SsivYE2QPdMB8iz21bl8yw/h6pOVNUoVY2KiPB5XMgYY0wO\n+VNEEoAaIlJdRIoAXYDYLG1igZ7i1Qg4pKp7suvrnLX1BNBWVY9l2VYXESkqItWBGkC8s73DItLI\nORusJzAvpx/cGGPMpfN5iq9z9tRgYBHe03Qnq+pWERngrJ+A90yp1ngPgh8D+mTX19n0W0BRYLG3\nJhCnqgOcbc8AtuGd5hqkqulOn4H89xTfz/jvcRRjjDEu8HmKb7CzU3yNMebi5eYpvsYYU6ClZ6Qz\nZeMUtqVu8924gPHrG+vGGFNQ/XzwZ3rO7cmKn1dQplgZFvdYTP2K9d2OFTBsJGKMMRfw8eaPuXnC\nzazfs57X7n2N0kVL0+zDZiSkJLgdLWBYETHGmCwOnjhIt0+70X12d2qXr82mAZsYettQvur9FWWK\nlaH5h81Zm7zW7ZgBwYqIMcZk8tVPX3HT+JuYsXUGzzd5nq96f8U1Za4BoGp4VZb3Xk7ZEmW596N7\n+XrX1y6ndZ8VEWOMAU6ln2L4kuE0mdKEYoWKsabvGp6+62kKhZ196Pjqy6/mq95fUb5keVp81II1\nu9a4lDgwWBExxhR421O3c+ukWxm9ejT96vVj/SPraVip4QXbVy5dmeW9lnPVZVfR4qMWrPplVT6m\nDSxWRIwxBZaqMi5+HPUm1iP5cDLzuszjnfvf4bIil/nsW6l0JZb3Xk6lUpVo+VFLVvy8Ih8SBx4r\nIsaYAmnv0b3c9/F9DP5sME2qNWHz3zbT9vq2F7WNiqUqsqzXMqpcXoVWU1ux/KfleRM2gFkRMcYU\nOPO+nUfd8XVZ9tMyxrUex4JuC7jqsqtytK0KpSqwvNdyqoVXo/XU1izduTSX0wY2KyLGmALjj1N/\n0H9+f9p90o4qpauwvv96BjYYiHP9vhy78rIrWdZrGddecS1tprVh8Q+Lcylx4LMiYowpEOJT4rnl\nnVuYtH4Sw+4YRtzDcdwYcWOubb98yfJ82fNLapatyf3T7mdR0qJc23YgsyJijAlpaRlpjFgxgtvf\nu50TaSdY1msZo5qNooinSK7vK6JkBEt7LuXGiBuJnh7N50mf5/o+Ao0VEWNMyNr5+07u/uBunln2\nDJ3rdOabv33D3dXuztN9litRjqU9l1IrohbR06NZ8N2CPN2f26yIGGNCjqoyZeMUIidEsnX/VqZ2\nmMrUDlMJLxaeL/u/ovgVLO25lLrl69L+k/bM3zE/X/brBisixpiQ8tvx3+g0qxO95/Xmlgq3sGnA\nJrrV7ZbvOcoUL8OSnkuIvCqSjjM6Mu/b0LwRqxURY0zIWLJzCXXH12Xet/MY3Ww0X/b8kqrhVV3L\nE14snMU9FlOvQj0emPkAs7fPdi1LXvGriIhISxHZISJJIjL8POtFRMY6678RkXq++orIgyKyVUQy\nRCQq0/LuIrIx0yNDRCKddcudbZ1ZV/7SPr4xJhScSDvB44sep/mHzSldtDRxD8fxxB1P4AnzuB2N\ny4tdzhc9vqBBxQZ0mtmJWdtmuR0pV/ksIiLiAcYBrYBaQFcRqZWlWSughvPoD4z3o+8WoANw1rUC\nVHWqqkaqaiTQA/hRVTdmatL9zHpV3X9Rn9YYE3I279tMw3cb8nrc6wxqMIh1/ddRr0I93x3zUemi\npVn00CIaVW5El1ld+GTLJ25HyjX+jEQaAkmqulNVTwHTgegsbaKBGPWKA8JFpEJ2fVV1u6ru8LHv\nrk4fY4w5S4Zm8EbcGzR4twH7/9jPgm4LeKv1W5QoXMLtaOdVqmgpPuv+GbdXuZ1us7sxbfM0tyPl\nCn+KSCVgV6b3yc4yf9r40zc7nYGsP+kpzlTWM3KpXzM1xgSllMMptPioBUMXDaXFdS3Y/LfNtK7R\n2u1YPpUqWoqF3RfS+OrGPDTnIT765iO3I12ygD2wLiK3AsdUdUumxd1VtTbQ2Hn0uEDf/iKSKCKJ\nqamp+ZDWGJNfPt32KTdNuIk1u9Ywsc1E5naeS0TJCLdj+e2yIpexoNsC7q56Nz3n9CRmU4zbkS6J\nP0UkBaiS6X1lZ5k/bfzpeyFdyDIKUdUU5/kI8DHe6bJzqOpEVY1S1aiIiOD5x2WMubAjJ4/QZ14f\nHpj5ANeWuZYNj2ygX/1+l3zdKzeULFKS/3T7D02rN6X33N68v+F9tyPlmD9FJAGoISLVRaQI3l/u\nsVnaxAI9nbO0GgGHVHWPn33PISJhQCcyHQ8RkUIiUs55XRhog/fgvDEmxK3ZtYbIdyKJ2RTDM3c9\nw+q/rqZm2Zpux7okJQqXYH7X+TS7phl9Y/vy3vr33I6UI4V8NVDVNBEZDCwCPMBkVd0qIgOc9ROA\nhUBrIAk4BvTJri+AiLQH3gQigAUislFVWzi7vQvYpao7M0UpCixyCogHWAK8e0mf3hgT0E6nn+b5\nFc8zcuVIql5elRW9V3DH1Xe4HSvXFC9cnHld5tH+k/Y8PP9hMjSDfvX7uR3rooiqup0hT0VFRWli\nYqLbMYwxF+n7A9/z0JyHiE+Jp3dkb8a0HEPpoqXdjpUnTqSdoOOMjiz8fiHj7xvPgKgBbkdCRNap\napSvdj5HIsYYk59Ulfc2vMffP/87RTxFmPngTB6o9YDbsfJUsULFmN1pNg/MfIC/Lfgb6RnpDGo4\nyO1YfrEiYowJGKl/pNJvfj/m7ZjHPdXvYUq7KVQqfTHfCgheRQsVZdaDs+g0qxODPxtMhmYw5NYh\nbsfyyYqIMSYgfJ70OX3m9eG347/x2r2v8VijxwiTgP0WQp4oWqgoMx+cSZdZXXj080dJ13T+3ujv\nbsfKVsH6L2SMCTjHTx9nyMIhtJrainIlypHQL4Ghtw0tcAXkjCKeInzywCd0vLEjQxcN5dU1r7od\nKVs2EjHGuGbDng10n92d7b9uZ2ijobxwzwsUK1TM7ViuK+wpzLSO0+g+uzv/WPwP0jWdJ+54wu1Y\n52VFxBjjikVJi7h/2v1ElIzgi4e+oPm1zd2OFFAKewrzcceP8YR5GLZkGOkZ6TzZ+Em3Y53Diogx\nJt+pKsOWDKNaeDW+7vs1ZUuUdTtSQCoUVogP239ImITxf1/+H+maztN3Pe12rLNYETHG5LsF3y9g\n075NfBD9gRUQHwqFFSKmXQwe8fDMsmdIz0jnX3/5l9ux/mRFxBiTr1SVEStGUC28miu3rQ1GnjAP\n70e/T5iE8exXz5KhGTz7l2cD4rphVkSMMfnqyx+/ZG3KWsbfN57CnsJuxwkanjAP77V9jzAJ47kV\nz5GhGTzX5DnXC4kVEWNMvhq5ciQVS1Wkd2Rvt6MEHU+Yh0ltJ+ERDyNWjiBd0xnZdKSrhcSKiDEm\n36zZtYZlPy3jtXtfs1N5cyhMwnjn/nfwhHl4cdWLpGekM6rZKNcKiRURY0y+GblyJOVKlKN//f5u\nRwlqYRLG2/e9TZiE8dKal0jXdF5u/rIrhcSKiDEmX6zfs56F3y9kZNORlCxS0u04QS9MwhjXehwe\n8fDq16+SnpHOay1ey/dCYkXEGJMvXlj5ApcXvZxBDYLj6rTBQEQY22osnjAPb6x9gwzN4I2Wb+Rr\nIbEiYozJc9tSt/Hp9k95uvHTXF7scrfjhBQR4fUWr+MRD6/FvUa6pvNmqzfzrZBYETHG5LkXV71I\nicIleKzRY25HCUkiwiv3voInzMPLa14mPSOdcfeNy5eLWPq1BxFpKSI7RCRJRIafZ72IyFhn/Tci\nUs9XXxF5UES2ikiGiERlWl5NRI6LyEbnMSHTuvoistnZ1lhx+wRpY4xPP/z2Ax9v/pi/Rf2NciXK\nuR0nZIkIo5uNZvgdw5mwbgID/jOADM3I8/36LCIi4gHGAa2AWkBXEamVpVkroIbz6A+M96PvFqAD\nsOI8u/1BVSOdR+b7RI4H+mXaV0t/PqQxxj2jV4+mcFhh/ue2/3E7SsgTEV645wWeavwUc7+dS8rh\nlDzfpz8jkYZAkqruVNVTwHQgOkubaCBGveKAcBGpkF1fVd2uqjv8Depsr7Sqxqn3xvAxQDt/+xtj\n8l/y4WQ+2PgBfW/pS4VSFdyOUyCICM83eZ7Nf9tMlcur5Pn+/CkilYBdmd4nO8v8aeNP3/Op7kxl\nfSUijTPtIzkH2zLGuOTl1S+jaMDeCyNUiQhXXnZlvuwrEA+s7wGuVtUDIlIfmCsitS9mAyLSH++0\nGldffXUeRDTG+LLv6D7eXf8uPW7qQdXwqm7HMXnEn5FICpB5TFTZWeZPG3/6nkVVT6rqAef1OuAH\noKbTr7I/21LViaoapapRERER2e3OGJNHXo97nZPpJxl+5znn4pgQ4k8RSQBqiEh1ESkCdAFis7SJ\nBXo6Z2k1Ag6p6h4/+55FRCKcA/KIyDV4D6DvdLZ3WEQaOWdl9QTm+f9RjTH55bfjvzEuYRydanei\nZtmabscxecjndJaqponIYGAR4AEmq+pWERngrJ8ALARaA0nAMaBPdn0BRKQ98CYQASwQkY2q2gK4\nC3hORE4DGcAAVf3NiTMQ+AAoDnzmPIwxAebNtW9y9NRR/u/O/3M7islj4j3RKXRFRUVpYmKi2zGM\nKTCOnDxC1TeqclfVu5jbZa7bcUwOicg6VY3y1S7vv85ojClQxieO5/cTv/NU46fcjmLygRURY0yu\nOX76OK9+/SrNr2lOg0oN3I5j8oEVEWNMrpm0fhL7/9jP03c97XYUk0+siBhjcsWp9FO8tOYl7rz6\nTu6qepfbcUw+CcQvGxpjglDMphiSDycz6f5Jbkcx+chGIsaYS5aWkcaoVaOoX6E+9157r9txTD6y\nkYgx5pLN2DqDH37/gTmd57hyn2/jHhuJGGMuSYZmMHLlSGpH1Kbt9W3djmPymY1EjDGXZN6389iW\nuo2PO3ycL3fSM4HF/osbY3JMVRmxcgTXXXEdnWp3cjuOcYGNRIwxObboh0Ws37Oe99q+hyfM43Yc\n4wIbiRhjckRVGbFiBFVKV+Ghmx5yO45xiY1EjDE5suLnFazetZq3Wr1FEU8Rt+MYl9hIxBiTIyNW\njuDKklfy11v+6nYU4yIrIsaYi7Y2eS1Ldi7hH7f/g+KFi7sdx7jIiogx5qKNXDmSK4pfwYCoAW5H\nMS6zImKMuSib9m5i/nfzeezWx7isyGVuxzEu86uIiEhLEdkhIkkiMvw860VExjrrvxGRer76isiD\nIrJVRDJEJCrT8uYisk5ENjvPTTOtW+5sa6PzKJ/zj26MyYkXVr1AqSKlGNJwiNtRTADweXaWiHiA\ncUBzIBlIEJFYVd2WqVkroIbzuBUYD9zqo+8WoAPwTpZd/grcr6q7RaQO3vuzV8q0vruq2v1ujXHB\njl93MHPrTIbdMYwyxcu4HccEAH9O8W0IJKnqTgARmQ5EA5mLSDQQo94btseJSLiIVACqXaivqm53\nlp21M1XdkOntVqC4iBRV1ZM5+HzGmFw0avUoihUqxtDbhrodxQQIf6azKgG7Mr1P5uyRQXZt/Omb\nnY7A+iwFZIozlfWMXOByoSLSX0QSRSQxNTX1InZnjLmQnw7+xIebPqR//f6UL2kzycYrYA+si0ht\nYDTwSKbF3VW1NtDYefQ4X19VnaiqUaoaFRERkfdhjSkAXlr9Ep4wD/+4/R9uRzEBxJ8ikgJUyfS+\nsrPMnzb+9D2HiFQG5gA9VfWHM8tVNcV5PgJ8jHeqzRiTx3Yf2c17G96j9829qVy6sttxTADxp4gk\nADVEpLqIFAG6ALFZ2sQCPZ2ztBoBh1R1j599zyIi4cACYLiqrs60vJCIlHNeFwba4D04b4zJY6+u\neZX0jHSG3TnM7SgmwPgsIqqaBgzGe5bUdmCGqm4VkQEicuabRguBnUAS8C4wMLu+ACLSXkSSgduA\nBSKyyNnWYOA64J9ZTuUtCiwSkW+AjXhHNO9e8k/AGJOtX4/9yoR1E+hWtxvXlLnG7TgmwIj3hKrQ\nFRUVpYmJdkawMTn19JdP88LKF9g6cCs3RtzodhyTT0RknapG+WoXsAfWjTHuO3jiIG/Gv0nHWh2t\ngJjzsiJijLmgcfHjOHzyME81fsrtKCZAWRExxpzX0VNHeT3ude6rcR+RV0W6HccEKCsixpjzmrhu\nIgeOH7BRiMmWFRFjzDlOpJ3g5TUv07R6U26rcpvbcUwAs9vjGmPO8f6G99l7dC9TO0x1O4oJcDYS\nMcac5XT6aUavHs1tlW+jSbUmbscxAc5GIsaYs0zdPJWfD/3MuNbjzrnKtjFZ2UjEGPOn9Ix0Xlz1\nIpFXRdK6Rmu345ggYCMRY8yfZm2bxXcHvmPmgzNtFGL8YiMRYwwAGZrByJUjubHcjXS4sYPbcUyQ\nsJGIMQaA/3z3Hzbv30xMuxjCxP6+NP6xfynGGFSVkStHUj28Ol3rdnU7jgkiNhIxxrBk5xLiU+J5\np807FAqzXwvGfzYSMcYwcuVIKpWqRK+be7kdxQQZKyLGFHCrflnFVz9/xf/e/r8ULVTU7TgmyFgR\nMaaAG7lyJBElIuhXv5/bUUwQ8quIiEhLEdkhIkkiMvw860VExjrrvxGRer76isiDIrJVRDJEJCrL\n9p502u8QkRaZltcXkc3OurFiJ7Ibc0nW7V7H50mf8/htj1OicAm345gg5LOIiIgHGAe0AmoBXUWk\nVpZmrYAazqM/MN6PvluADsCKLPurBXQBagMtgbed7eBst1+mfbW8iM9qjMli5MqRhBcLZ2CDgW5H\nMUHKn5FIQyBJVXeq6ilgOhCdpU00EKNecUC4iFTIrq+qblfVHefZXzQwXVVPquqPQBLQ0NleaVWN\nU++N4WOAdhf/kY0xAFv3b2XOt3N4tOGjlC5a2u04Jkj5U0QqAbsyvU92lvnTxp++/u6vkvPa57ZE\npL+IJIpIYmpqqo/dGVMwvbDqBUoWLsmjtz7qdhQTxELywLqqTlTVKFWNioiIcDuOMQEn6bckpm+Z\nzsAGAylboqzbcUwQ8+dbRSlAlUzvKzvL/GlT2I++/u4vxXl9MdsyxpzHqFWjKBxWmMdve9ztKCbI\n+TMSSQBqiEh1ESmC96B3bJY2sUBP5yytRsAhVd3jZ9+sYoEuIlJURKrjPYAe72zvsIg0cs7K6gnM\n8/eDGmO8fjn0CzGbYuhXrx9XXXaV23FMkPM5ElHVNBEZDCwCPMBkVd0qIgOc9ROAhUBrvAfBjwF9\nsusLICLtgTeBCGCBiGxU1RbOtmcA24A0YJCqpjtxBgIfAMWBz5yHMeYivLz6ZRTlf+/4X7ejmBAg\n3hOdQldUVJQmJia6HcOYgLD36F6qj6lO97rdmdR2kttxTAATkXWqGuWrXUgeWDfGnN9rX7/GqfRT\nDL/znO8MG5MjVkSMKSAOHDvA+MTxdKnTheuuuM7tOCZEWBExpoAYu3YsR08d5ck7n3Q7igkhVkSM\nKQAOnzzM2PixtL+hPXXK13E7jgkhVkSMKQDGJ4zn4ImDPNX4KbejmBBjRcSYEHfs9DFe/fpVWl7X\nkvoV67sdx4QYKyLGhLhJ6yeReizVRiEmT1gRMSaEnUw7yUurX+Kuqndx59V3uh3HhCB/rp1ljAlS\nMZtiSDmSwvvR77sdxYQoG4kYE6LSMtIYtXoUDSo2oNk1zdyOY0KUjUSMCVHTt0xn5+87eb3F69id\npE1esZGIMSEoQzN4YeUL1C1flzY127gdx4QwG4kYE4LmbJ/D9l+3M73jdMLE/lY0ecf+dRkTYlSV\nkStHUrNsTR6o9YDbcUyIs5GIMSHms6TP2LB3A+9Hv48nzON2HBPibCRiTAhRVUasGEHVy6vSvW53\nt+OYAsCvIiIiLUVkh4gkicg5NyJwbos71ln/jYjU89VXRK4QkcUi8r3zXMZZ3l1ENmZ6ZIhIpLNu\nubOtM+vKX/qPwJjQsfyn5Xyd/DXD7hhGYU9ht+OYAsBnERERDzAOaAXUArqKSK0szVrhvRd6DaA/\nMN6PvsOBpapaA1jqvEdVp6pqpKpGAj2AH1V1Y6Z9dT+zXlX35+RDGxOKDp88zFNfPkWFyyrQ55Y+\nbscxBYQ/I5GGQJKq7lTVU8B0IDpLm2ggRr3igHARqeCjbzQwxXk9BWh3nn13dfoYY7KxYc8G6k+s\nz9qUtbzU/CWKFSrmdiRTQPhTRCoBuzK9T3aW+dMmu75Xquoe5/Ve4Mrz7LszMC3LsinOVNYzYt+g\nMgWcqjI+YTy3vXcbx08fZ3mv5Tx000NuxzIFSEAcWFdVBTTzMhG5FTimqlsyLe6uqrWBxs6jx/m2\nJyL9RSRRRBJTU1PzKrYxrjp04hCdZ3Vm4MKBNK3elI0DNtK4amO3Y5kCxp8ikgJUyfS+srPMnzbZ\n9d3nTHnhPGc9vtGFLKMQVU1xno8AH+OdLjuHqk5U1ShVjYqIiMj2wxkTjNbtXke9ifWYvX02o5uN\n5j/d/kO5EuXcjmUKIH+KSAJQQ0Sqi0gRvL/cY7O0iQV6OmdpNQIOOVNV2fWNBXo5r3sB885sTETC\ngE5kOh4iIoVEpJzzujDQBsg8SjEm5Kkqb659k9sn387p9NOs6LOCJ+54wr6Vblzj88uGqpomIoOB\nRYAHmKyqW0VkgLN+ArAQaA0kAceAPtn1dTY9CpghIn2Bn/EWjTPuAnap6s5My4oCi5wC4gGWAO/m\n7GMbE3wOnjhI39i+zN4+m/tr3s/70e9TtkRZt2OZAk68hyNCV1RUlCYmJrodw5hLEp8ST+dZnUk+\nnMzoZqMZ2mioXZnX5CkRWaeqUb7a2WVPjAlgqsobcW8wbMkwKpaqyKo+q7i18q1uxzLmT1ZEjAlQ\nvx3/jT7z+hC7I5Z2N7RjctvJlClexu1YxpzFiogxASguOY7Oszqz58gexrQcw5CGQ2z6ygQkO6XD\nmACSoRm8suYVGr/fGI94WP3X1Tx666NWQEzAspGIMQHiwLED9JrbiwXfL6DjjR2Z1HYS4cXC3Y5l\nTLasiBgTAFb/spoun3Zh/x/7eavVWwxsMNBGHyYo2HSWMS7K0AxGrRrF3R/cTVFPUb7u+zWDGg6y\nAmKCho1EjHFJ6h+p9Jzbk8+TPqdT7U68e/+7lC5a2u1YxlwUKyLGuGDFzyvo+mlXDhw7wPj7xvNI\n/Uds9GGCkk1nGZOPMjSDkStG0mRKE0oWLkncw3EMiBpgBcQELRuJGJNP9h3dR485PVi8czHd6nZj\nwn0TKFW0lNuxjLkkVkSMyQfLflxGt9ndOHjiIO/e/y59b+lrow8TEmw6y5g8lJ6Rzr+X/5tmHzYj\nvFg48Q/H83C9h62AmJBhIxFj8sjeo3vpPrs7X/74JT1u6sHb973NZUUuczuWMbnKiogxeWDpzqV0\nn92dwycPM7ntZHpH9rbRhwlJNp0VAmI2xVBmdBn6xfbjm33fuB2nQEvPSOefy/5J8w+bU7ZEWRL6\nJdDnlj5WQEzIsiIS5OJT4uk3vx8RJSKYunkqN0+4mSZTmjBn+xzSM9Ldjleg7D6ym3ti7uH5Fc/T\nO7I38Q/HU7t8bbdjGZOn/CoiItJSRHaISJKIDD/PehGRsc76b0Sknq++InKFiCwWke+d5zLO8moi\nclxENjqPCZn61BeRzc62xkoB//Nu79G9dPikAxVLVeTrvl+T/HgyLzV7iZ2/76TDjA5cO/ZaXlnz\nCr8f/93tqCFvUdIiIidEkrA7gZh2MUyOnkzJIiXdjmVM3lPVbB9472f+A3ANUATYBNTK0qY18Bkg\nQCNgra++wEvAcOf1cGC087oasOUCWeKd7Yuzv1a+8tevX19D0cm0k3rn5Du1+IjiunHPxrPWnU4/\nrbO3zda/fPAX5Vm0xMgS+sj8R3TLvi0upQ1dp9NP65NLnlSeReu8XUe3p253O5IxuQJIVB+/X1XV\nr5FIQyBJVXeq6ilgOhCdpU00EOPsOw4IF5EKPvpGA1Oc11OAdtmFcLZXWlXjnA8Y46tPKPv7539n\n1S+rmBw9mZuvuvmsdYXCCtH+xvYs67WMjY9spGudrkzZNIU64+vQLKYZsTtibaorFyQfTqbJlCa8\nuOpF+tXrR/zD8dxQ7ga3YxmTr/wpIpWAXZneJzvL/GmTXd8rVXWP83ovcGWmdtWdqayvRKRxpn0k\n+8hRIExaP4nxieN54vYn6FKnS7Ztb77qZia1ncSuobt48Z4X2XFgB9HTo6nxZg1e+/o1Dp44mE+p\nQ8vC7xcSOSGSjXs3MrXDVCbeP5HihYu7HcuYfBcQB9adkYU6b/cAV6tqJPA48LGIXNSlTUWkv4gk\nikhiampqLqd1V1xyHIMWDuLea+/lhXte8LtfuRLlGH7ncH587EdmPjiTyqUr8z9f/A+VXqvEwAUD\n2Z66PQ+DDi6oAAAQ1UlEQVRTh47T6ad5YvET3PfxfVQuXZl1/dfRrW43t2MZ4xp/ikgKUCXT+8rO\nMn/aZNd3nzNFdWaqaj+Aqp5U1QPO63V4j6nUdPpV9pEDp99EVY1S1aiIiAg/PmJw2HNkDx0+6UDl\n0pWZ1nEanjDPRW+jUFghHqj1ACv6rGB9//V0rt2ZyRsmU+vtWtz74b0s+G4BGZqRB+mD3y+HfuHu\nD+7m5TUvM6D+AOIejqNm2ZpuxzLGVf4UkQSghohUF5EiQBcgNkubWKCnc5ZWI+CQM1WVXd9YoJfz\nuhcwD0BEIkTE47y+BqgB7HS2d1hEGjlnZfU806cgOJl2ko4zOnLo5CHmdp7LFcWvuORt3lLhFiZH\nT2bX0F2MbDqSbanbaDOtDTXfrMmYuDEcOnEoF5KHhvk75hM5IZIt+7fwyQOfML7NeIoVKuZ2LGPc\n58/Rd7xnX32Hd1TwlLNsADDAeS3AOGf9ZiAqu77O8rLAUuB7YAlwhbO8I7AV2AisB+7P1CcK2OJs\n6y1AfGUPlbOz+sf2V55FZ2yZkWf7OJV2Sj/Z8one/t7tyrPoZS9cpoMXDNZvU7/Ns30Gut2Hd+vj\nnz+uPIveMuEW/f7A925HMiZf4OfZWeJtG7qioqI0MTHR7RiX5J3EdxiwYABP3vnkRR0HuRSJuxN5\nM/5Npm+Zzqn0U7S8riWPNnyUFte1IEwC4lBarjtw7ACJuxNJ3J1Iwu4EEncnknLEO2M6qMEgXrn3\nFRt9mAJDRNapapTPdlZEAtvqX1bTZEoTml3TjPld5+foOMil2Hd0HxPXTWR84nj2HN1DjStqMKTh\nEHpF9grqW7keOXmE9XvWk7A74c+CsfP3nX+uv77s9URVjKJBxQbcefWd1K9Y38W0xuQ/KyKOYC4i\nKYdTqD+xPqWKliL+4XjKFC/jWpZT6af4dNunjI0fS1xyHKWKlKJPZB8GNxxMjbI1XMvlj+Onj7Np\n3yYSUv5bML799VvUOSGw6uVVaVCpAQ0qNiCqYhT1K9Tn8mKXu5zaGHdZEXEEaxE5mXaSuz+4m62p\nW4nrGxdQ12CKT4nnzfg3+WTLJ6RlpNG6RmuGNBxC82ubuz7VdTr9NFv2b/mzWCTsTmDL/i2kZaQB\ncNVlV/1ZLM48R5QMnTP4jMktVkQcwVhEVJWHYx9m8sbJfNrpUzrc2MHtSOe158ge3ln3DhMSJ7Dv\nj33cUO4GhjQcQs+be+bLfTPSM9LZcWAHCSn/LRgb927kZPpJAMoUK/NnsWhQyVswKpWqZFfUNcYP\nVkQcwVhE3k54m0ELB/F046d5vunzbsfx6WTaSWZum8mYtWNI3J1I6aKl6XtLXwY1GMS1V1ybK/tQ\nVXb+vvPPYpGwO4H1e9Zz9NRRAEoWLkn9ivXPGmVcU+YaKxjG5JAVEUewFZGVP6+kaUxTWlzbgtiu\nsa5PD10MVWVtylrGrh3LzG0zSc9Ip03NNjx666PcU/0ev3+hqyopR1K8BSMlgcQ93jOmfjv+GwBF\nPUWJvCryrFHG9WWvz/eTDowJZVZEHMFURHYd2kXUu1GEFwtn7cNrCS8W7nakHNt9ZDcTEicwIXEC\nqcdSqRVRiyENh9Djph7nXCI99Y/Us06rTdidwN6jewHwiIc65eucNSVVp3wdiniKuPGxjCkwrIg4\ngqWInEg7QeP3G7Pj1x2sfXgtN0bc6HakXHEi7QQzts5gzNoxrN+znvBi4fS9pS/lS5b3TkulJPDz\noZ8BEITry11/1pRU5FWRdmFDY1xgRcQRDEVEVekzrw9TNk1hbue5RN+Q9Ur7wU9VWbNrDWPjx/Lp\ntk9J13Sqh1c/a0qqXoV6Qf3dE2NCib9FpFB+hDHZeyv+LaZsmsK/7v5XSBYQABHhjqvv4I6r7yD1\nj1REhHIlyrkdyxhziayIuGz5T8sZumgoba9vyz/v/qfbcfKFfS/DmNARPKf+hKBfDv3CgzMfpEbZ\nGnzY/sOgOhPLGGPAiohrjp8+TvtP2nMq/RRzO8+1YwHGmKBk01kuUFX6/6c/G/ZsILZrLNeXu97t\nSMYYkyNWRFwwZu0YPvrmI577y3O0qdnG7TjGGJNjNp2Vz7788Uv+8cU/aH9De5666ym34xhjzCWx\nIpKPfjr4E51mduL6ctczpd0UO5BujAl6fv0WE5GWIrJDRJJEZPh51ouIjHXWfyMi9Xz1FZErRGSx\niHzvPJdxljcXkXUistl5bpqpz3JnWxudR/lL+/j559jpY7T/pD1pGWnM7TyXUkVLuR3JGGMumc8i\nIiIevPdPbwXUArqKSK0szVoBNZxHf2C8H32HA0tVtQbee62fKTC/4r2vel2gF/Bhln11V9VI57H/\nYj6sW85c2n3T3k1M6zgt4G/iZIwx/vJnJNIQSFLVnap6CpgOZP1adTQQ49zfPQ4IF5EKPvpGA1Oc\n11OAdgCqukFVdzvLtwLFRaRoDj9fQHj161eZtmUaI5qOoFWNVm7HMcaYXONPEakE7Mr0PtlZ5k+b\n7Ppeqap7nNd7gSvPs++OwHpVPZlp2RRnKusZCYKbRSz+YTHDlgzjgVoP8OSdT7odxxhjclVAHNlV\n71Ugz7oSpIjUBkYDj2Ra3F1VawONnUeP821PRPqLSKKIJKampuZRat92/r6TzrM6UyuiFu9Hv283\nSDLGhBx/ikgKUCXT+8rOMn/aZNd3nzPlhfP85/ENEakMzAF6quoPZ5araorzfAT4GO902TlUdaKq\nRqlqVESEO9dp+uPUH7Sb3g5Fmdt5br7cLtYYY/KbP0UkAaghItVFpAjQBYjN0iYW6OmcpdUIOORM\nVWXXNxbvgXOc53kAIhIOLACGq+rqMzsQkUIiUs55XRhoA2y56E+cD1SVv8b+la2pW5necXqu3SLW\nGGMCjc9vrKtqmogMBhYBHmCyqm4VkQHO+gnAQqA1kAQcA/pk19fZ9Chghoj0BX4GOjnLBwPXAf8U\nkTOXtb0X+ANY5BQQD7AEePdSPnxeeWn1S8zYOoPRzUbT4roWbscxxpg8YzelymWfJ31O66mt6VS7\nE9M6TrPjIMaYoOTvTakC4sB6qEj6LYmun3al7pV1ea/te1ZAjDEhz4pILjl66ijtprcjTMKY03kO\nJYuUdDuSMcbkObuKby5QVXrP7c32X7ez6KFFXFPmGrcjGWNMvrAikgteXPUin27/lFeav0Kza5q5\nHccYY/KNTWddogXfLeDpL5+mW91uPH7b427HMcaYfGVF5BJ8d+A7us/uzs1X3cy7979rB9KNMQWO\nFZEcOnzyMO2mt6NQWCHmdJ5DicIl3I5kjDH5zo6J5ECGZtBrbi++O/AdX/T4gmrh1dyOZIwxrrAi\nkgMjVoxg7rdzeb3F6zSt3tR3B2OMCVE2nXWR5u+Yz7+W/4uHbnqIx259zO04xhjjKisiF+HbX7/l\noTkPUa9CPSa2mWgH0o0xBZ4VET8dOnGIdtPbUdRTlDmd51C8cHG3IxljjOvsmIgfMjSDHnN6kPRb\nEkt7LuXqy692O5IxxgQEKyJ++PfyfzP/u/mMbTmWu6vd7XYcY4wJGDad5cPcb+fy3Irn6HVzLwY3\nHOx2HGOMCShWRLKxLXUbPeb0oEHFBkxoM8EOpBtjTBZWRC7g4ImDtJvejhKFSzC782yKFSrmdiRj\njAk4fhUREWkpIjtEJElEhp9nvYjIWGf9NyJSz1dfEblCRBaLyPfOc5lM65502u8QkRaZltcXkc3O\nurGSR0OD9Ix0us/uzo8Hf2TWg7OoXLpyXuzGGGOCns8iIiIeYBzQCqgFdBWRWlmatQJqOI/+wHg/\n+g4HlqpqDWCp8x5nfRegNtASeNvZDs52+2XaV8uL/8j+qVWuFmNajqFx1cZ5tQtjjAl6/pyd1RBI\nUtWdACIyHYgGtmVqEw3EqPeG7XEiEi4iFYBq2fSNBv7i9J8CLAeGOcunq+pJ4EcRSQIaishPQGlV\njXO2FQO0Az7L0SfPhifMw8v3vpzbmzXGmJDjz3RWJWBXpvfJzjJ/2mTX90pV3eO83gtc6ce2kn3k\nAEBE+otIoogkpqamXviTGWOMuSQBcWDdGcFoLm5voqpGqWpUREREbm3WGGNMFv4UkRSgSqb3lZ1l\n/rTJru8+Z8oL53m/H9uqfJ7lxhhjXOJPEUkAaohIdREpgvegd2yWNrFAT+csrUbAIWeqKru+sUAv\n53UvYF6m5V1EpKiIVMd7AD3e2d5hEWnknJXVM1MfY4wxLvB5YF1V00RkMLAI8ACTVXWriAxw1k8A\nFgKtgSTgGNAnu77OpkcBM0SkL/Az0Mnps1VEZuA9+J4GDFLVdKfPQOADoDjeA+q5flDdGGOM/8R7\nOCJ0RUVFaWJiotsxjDEmqIjIOlWN8tUuIA6sG2OMCU5WRIwxxuRYyE9niUgq3mMuOVEO+DUX4+Sl\nYMoKwZU3mLJCcOUNpqwQXHkvNWtVVfX5HYmQLyKXQkQS/ZkTDATBlBWCK28wZYXgyhtMWSG48uZX\nVpvOMsYYk2NWRIwxxuSYFZHsTXQ7wEUIpqwQXHmDKSsEV95gygrBlTdfstoxEWOMMTlmIxFjjDE5\nZkUkCxEpJiLxIrJJRLaKyL/dzuQPEfGIyAYR+Y/bWXwRkZ+cO1RuFJGAvpyAc2+cWSLyrYhsF5Hb\n3M50PiJyvfPzPPM4LCJ/dztXdkRkqPP/2BYRmSYiAXsPahF5zMm5NRB/riIyWUT2i8iWTMsuePfY\n3GRF5FwngaaqejMQCbR0LioZ6B4Dtrsd4iI0UdXIIDhdcgzwuareANxMgP6MVXWH8/OMBOrjvYbd\nHJdjXZCIVAIeBaJUtQ7ea+t1cTfV+YlIHbx3VG2I999AGxG5zt1U5/iAc+/0et67x+Y2KyJZqNdR\n521h5xHQB45EpDJwHzDJ7SyhREQuB+4C3gNQ1VOqetDdVH65B/hBVXP6Jdv8UggoLiKFgBLAbpfz\nXMiNwFpVPaaqacBXQAeXM51FVVcAv2VZHI33rrE4z+3yYt9WRM7DmRraiPceJ4tVda3bmXx4A3gC\nyHA7iJ8UWCIi60Skv9thslEdSAXed6YKJ4lISbdD+aELMM3tENlR1RTgFeAXYA/e20d84W6qC9oC\nNBaRsiJSAu8Vy6v46BMILnT32FxlReQ8VDXdmRaojPf+7nXcznQhItIG2K+q69zOchHudH6+rYBB\nInKX24EuoBBQDxivqrcAf5BHUwK5xblvT1tgpttZsuPMz0fjLdQVgZIi8pC7qc5PVbcDo4EvgM+B\njUB6tp0CTG7fPTYzKyLZcKYulnHuXGMguQNoKyI/AdOBpiLykbuRsuf8FYqq7sc7b9/Q3UQXlAwk\nZxqJzsJbVAJZK2C9qu5zO4gPzYAfVTVVVU8Ds4HbXc50Qar6nqrWV9W7gN+B79zO5IcL3T02V1kR\nyUJEIkQk3HldHGgOfOtuqgtT1SdVtbKqVsM7jfGlqgbkX3QAIlJSREqdeQ3ci3e6IOCo6l5gl4hc\n7yy6B+/N0gJZVwJ8KsvxC9BIREo4dyq9hwA9aQFARMo7z1fjPR7ysbuJ/HKhu8fmKp93NiyAKgBT\nRMSDt8jOUNWAP202iFwJzPH+3qAQ8LGqfu5upGwNAaY600Q7ce7aGYicotwceMTtLL6o6loRmQWs\nx3sH0w0E9rfBPxWRssBpvHdbDagTLERkGvAXoJyIJAP/4gJ3j831fds31o0xxuSUTWcZY4zJMSsi\nxhhjcsyKiDHGmByzImKMMSbHrIgYY4zJMSsixhhjcsyKiDHGmByzImKMMSbH/h+oXlrgMRIrHAAA\nAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0xdc7b6a0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.plot(Ks, np.array(v_scores), 'g-')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 跟CH_score趋势相差很大，但v_score随后呈上升趋势"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD8CAYAAAB+UHOxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAH05JREFUeJzt3X10VdW57/HvkxcCCchrBCQJUOCo4HtTiget7wKVAsPR\nMvAMW2qj3tND1YNVq7enVcc4HKr31pe2UqFKyziKXLTHwrWnKnLEg1qhQbSKwAUFJIgJlvf3vDz3\nj7X2zk5IYJNkk529fp+OjDXXXHPtPafa9ew151xzmbsjIiLRk9XeFRARkfahACAiElEKACIiEaUA\nICISUQoAIiIRpQAgIhJRSQUAM+thZi+Y2TozW2tmF5tZLzNbYmYbwm3PhPL3mdlGM1tvZmNSV30R\nEWmpZO8AHgdedvezgPOBtcC9wFJ3HwYsDfcxs+HAFGAEMBaYZWbZbV1xERFpnRMGADPrDnwNeBrA\n3Y+6+25gIjAvLDYPmBSmJwIL3P2Iu28CNgIj27riIiLSOjlJlBkM7AB+a2bnA6uAO4C+7r49LPM5\n0DdMDwDeSTi/IsxrwMxuBW4FKCgo+PJZZ53VogaIiETVqlWrvnD3wpaen0wAyAEuAm5z9xVm9jhh\nd0+Mu7uZndSaEu4+B5gDUFpa6uXl5SdzuohI5JnZltacn8wYQAVQ4e4rwv0XCAJCpZn1DyvRH6gK\nj28DihPOLwrzREQkjZwwALj758BWMzszzLoK+AhYDEwN86YCi8L0YmCKmeWZ2WBgGLCyTWstIiKt\nlkwXEMBtwLNm1gn4BLiJIHgsNLMyYAswGcDd15jZQoIgUQNMc/faNq+5iIi0SlIBwN3fA0qbOHRV\nM+VnADNaUS8REUkxPQksIhJRCgAiIhGlACAiElEKACIiEaUAICISUQoAIiIRpQAgIhJRCgAiIhGl\nACAiElEKACIiEaUAICISUQoAIiIRpQAgIhJRCgAiIhGlACAiElEKACIiEaUAICISUQoAIiIRpQAg\nIhJRCgAiIhGlACAiElEKACIiEaUAICISUQoAIiIRpQAgIhJRCgAiIhGlACAiElFJBQAz22xmH5jZ\ne2ZWHub1MrMlZrYh3PZMKH+fmW00s/VmNiZVlRcRkZY7mTuAK9z9AncvDffvBZa6+zBgabiPmQ0H\npgAjgLHALDPLbsM6i4hIG2hNF9BEYF6YngdMSshf4O5H3H0TsBEY2YrvERGRFEg2ADjwmpmtMrNb\nw7y+7r49TH8O9A3TA4CtCedWhHkiIpJGcpIsd4m7bzOz04ElZrYu8aC7u5n5yXxxGEhuBSgpKTmZ\nU0VEpA0kdQfg7tvCbRXwIkGXTqWZ9QcIt1Vh8W1AccLpRWFe48+c4+6l7l5aWFjY8haIiEiLnDAA\nmFmBmXWLpYFrgQ+BxcDUsNhUYFGYXgxMMbM8MxsMDANWtnXFRUSkdZLpAuoLvGhmsfLz3f1lM/sL\nsNDMyoAtwGQAd19jZguBj4AaYJq716ak9iIi0mInDADu/glwfhP5fwOuauacGcCMVtdORERSRk8C\ni4hElAKAiEhEKQCIiESUAoCISEQpAIiIRJQCgIhIRCkAiIhElAKAiEhEKQCIiESUAoCISEQpAIiI\nRJQCgIhIRCkAiIhElAKAiEhEKQCIiESUAoCISEQpAIiIRJQCgIhIRCkAiIhElAKAiEhEKQCIiESU\nAoCISEQpAIiIRJQCgIhIRCkAiIhElAKAiEhEKQCIiESUAoCISEQlHQDMLNvMVpvZS+F+LzNbYmYb\nwm3PhLL3mdlGM1tvZmNSUXEREWmdk7kDuANYm7B/L7DU3YcBS8N9zGw4MAUYAYwFZplZdttUV0RE\n2kpSAcDMioDrgKcSsicC88L0PGBSQv4Cdz/i7puAjcDItqmuiIi0lWTvAB4D7gHqEvL6uvv2MP05\n0DdMDwC2JpSrCPMaMLNbzazczMp37NhxcrUWEZFWO2EAMLPxQJW7r2qujLs74Cfzxe4+x91L3b20\nsLDwZE4VEZE2kJNEmdHABDP7OtAZOM3MngEqzay/u283s/5AVVh+G1CccH5RmCciImnkhHcA7n6f\nuxe5+yCCwd3/cvcbgcXA1LDYVGBRmF4MTDGzPDMbDAwDVrZ5zUVEpFWSuQNozs+AhWZWBmwBJgO4\n+xozWwh8BNQA09y9ttU1FRGRNmVB9337Ki0t9fLy8vauhohIh2Jmq9y9tKXn60lgEZGIUgAQEYko\nBQARkYhSABARiSgFABGRiFIAEBGJKAUAEZGIUgAQEYkoBQARkYhSABARiSgFABGRiFIAEBGJKAUA\nEZGIUgAQEYkoBQARkYhSABARiSgFABGRiFIAEBGJKAUAEZGIUgAQEYkoBQARkYhSABARiSgFABGR\niFIAEBGJKAUAEZGIUgAQEYkoBQARkYg6YQAws85mttLM3jezNWb2YJjfy8yWmNmGcNsz4Zz7zGyj\nma03szGpbICIiLRMMncAR4Ar3f184AJgrJmNAu4Flrr7MGBpuI+ZDQemACOAscAsM8tOReVFRKTl\nThgAPLA/3M0N/xyYCMwL8+cBk8L0RGCBux9x903ARmBkm9ZaRERaLakxADPLNrP3gCpgibuvAPq6\n+/awyOdA3zA9ANiacHpFmNf4M281s3IzK9+xY0eLGyAiIi2TVABw91p3vwAoAkaa2TmNjjvBXUHS\n3H2Ou5e6e2lhYeHJnCoiIm3gpGYBuftu4HWCvv1KM+sPEG6rwmLbgOKE04rCPBERSSPJzAIqNLMe\nYboLcA2wDlgMTA2LTQUWhenFwBQzyzOzwcAwYGVbVzxtLFsGDzwAO3e2d01ERE5KThJl+gPzwpk8\nWcBCd3/JzP4MLDSzMmALMBnA3deY2ULgI6AGmObutampfjtatiz4O3QIHn4YbrwRevVq71qJiCTN\ngu779lVaWurl5eWp+fC6OrjnHnjmGTh6FG64AfLyYPZsqKyEggIoKYGLL4YFC44t+/jjUFEBgwfD\ntdfCp58Gf2Zw4EDD77rssiAoiIicAma2yt1LW3p+5j4JXFcHd90FPXvCz38Ow4fD174Gs2ZBUREc\nPAgDB8IVVwQX+Lq6hmXnzg3K5ubCd74TfObSpXDzzcG5jS/+y5fD88+f+naKiLRQZt4B1NTA0KGw\nZUvTxy+6CN59N0jn5QXlaxv1UuXkBPmNdeoU3B00tmFD8J0iIqeI7gCa8p3vNH/xB1i/vj595Mix\nF38Ifvk3pamLP8DatbBrV/J1FBFpZ5kVAGLdPs89d/xyjbtvmnLoUHLfOX48FBfDhAlw993JnSMi\nkgaSmQWU3urqggvvk08GffOnQlZW8L0AL70EkycHA8MiIh1Ix78DmDsXHnkkuPife+7xy5q1zXd2\n6dJw/7332uZzRUROoY4bAGLdPbffXp/3wQfHP6etBrzHjatP5+Y2HFMQEekgOl4AaDy9s6Sk/tio\nUSf3WQUFJy7Tr199+uGHg/7+F16AsrIgoDQ3KCwikuY6TgCIXfh79Agu/N26Bfnr19d37bzzTn35\n008/8WfGBoObmvGTFf6jmTsXBoSLmd5zT/AwmDs89VTL2iEikiY6TgCYOze48JeGU163Jawvd9ZZ\ncMYZQTo2DlBVBYMGNf1ZsV/1sQv/6afDaacF6dhdwZ13BtuamqBryV0XfhHJKB0nAKxeHWzfeqs+\nr6go+KW+bh3s2RPkbdlS/+v9s88alo1d3KurG27N6u8iYucOGqTpnSKS0dI3AMS6fPr1CxZZW7Om\n/tiECUEXUEUFdO4c5MX64vfuha98JUjfc0+wzc4O7hgOHw72L7204Xft3VsfAGbODMYVfvADdfeI\nSEZL3wDw1FNBl8+hQ8HF/Y03gl/kR4/Cn/5U38d/wQXB8gzV1dC1a/Arf8UKuPxyeOKJoIx7EEhi\nc/cbT+McMQLefDP41f+DH8A11+jCLyIZL30DQOxp3sSlGmIX/bPOCv4AfvKTYCVPCIJArOy+fbB4\ncZDu3RteeSXo8+/UCX772/oZQ889B4sWBUFg0yZd+EUkMtI3AMTm7McGXyEYqB06FD78EF59NdiP\nddM0VTZm6NBgcDgvLwgQeXn14wF79gQLv4mIREz6BoApU4JtXl6wLSqCiRPhhz8MLvLV1XDTTcGg\nbXNlY7KaaOZ55wXbf/zHhg+TiYhERPoGgFtugenTIT8/uID36RMM7n7728EAsFlw/Hhli4qC45df\nHmzPO68+r6ysfppoLHCIiERIZr4PQEQkAvQ+ABERaREFABGRiFIAEBGJKAUAEZGIUgAQEYkoBQAR\nkYhSABARiSgFABGRiFIAEBGJqBMGADMrNrPXzewjM1tjZneE+b3MbImZbQi3PRPOuc/MNprZejMb\nk8oGiIhIyyRzB1AD/NDdhwOjgGlmNhy4F1jq7sOApeE+4bEpwAhgLDDLzLJTUXkREWm5EwYAd9/u\n7u+G6X3AWmAAMBGYFxabB0wK0xOBBe5+xN03ARuBkW1d8Q5j2TJ44AHYubO9ayIi0sBJjQGY2SDg\nQmAF0Nfdt4eHPgf6hukBwNaE0yrCvMafdauZlZtZ+Y4dO06y2h3IsmXw4IMKACKSdpIOAGbWFfg9\n8M/uvjfxmAdLip7UsqLuPsfdS929tLCw8GRObVuN3z08bRrceWfwwpj9+4N3DxQXw+TJTZetqYHN\nm4PlqceMgbPPDt5A9sIL9Rd/gGHD6pelFhFJA0m9CsvMcgku/s+6+3+E2ZVm1t/dt5tZf6AqzN8G\nFCecXhTmpae5c4N3D8+dG7xHYMIE+NWv4OBBePHF4OJfUQE339x02eHD4brrgs+qrISFC2HcuODl\n8m+/Hbyn4NFHYflyOPPM9m2riEiCZGYBGfA0sNbdH0k4tBiYGqanAosS8qeYWZ6ZDQaGASvbrspt\nbPXqYHvbbXDDDUH6rbdg9Gh49lmYPx9KSuDqq5svGzN2bPDqyYEDgy6fvLz6V1P26wfteacjItJI\nMncAo4FvAx+Y2Xth3v8EfgYsNLMyYAswGcDd15jZQuAjghlE09y9ts1r3lbOPz/YzpgRdN988knw\nDuGtW4M3jRUUBL/is7KaLxsTe7ewWf27ibt0CbZr1wZdQz17IiKSDk4YANz9TcCaOXxVM+fMAGa0\nol6nTlkZrFsXdNns3Rt00zzxRNDnf8cdsG9f8O7h45U9nvHj4ckng+6isjJ46qnUt0lEJAl6JaSI\nSAelV0KKiEiLKACIiESUAoCISEQpAKQrLSEhIimmAJCutISEiKSYAkBLaQkJEengkloKQpqgJSRE\npINTAGipxGUhYhKXkCgpqV9CIlamcdlYAEhcQuKzz7SEhIicEgoALaUlJESkg9MYQEuVlQUX+Jkz\nYdIkmD0b8vODPv/8/GOXkGiq7PGMHw+DBgXdRXffnfLmiEj0aCkIEZEOSktBiIhIiygAiIhElAKA\niEhEKQCIiESUAoCISEQpAIiIRJQCQEQt27yMB5Y9wM5DWmxOJKoUACJq2eZlPPjGgwoCIhGmpSDS\nVJ3Xcc+Se3jmr89wtPYoN5xzA3k5ecxeNZvKuyopyC2g5LESLi66mAXfXHBM2cfHPU7F3goGPz6Y\na4dcy6d7PqXqQBWzx8+mT34fHnwjWG30lyt/yYptK1hx84p2brGInGq6A0hTc1fP5ed//jkX9LsA\nd2dW+SzKPyvnYPVBej/cm5c3vkzF3gpe3/w6I38z8piyV867kpq6GgBe/fhVenbuyd4je5nywhS2\n79vO8D7D49+1cttKLv/d5e3UUhFpL7oDSFPvbn8XgFc+fiWet/zT5RjG0dqjPPTWQ2RZFl8c/IJR\nA0Y1WfaJlU80+Dx3x3HuX3Y/G3ZuiB87u8/ZPP+t51PdJBFJM7oDSFP7j+5vMt8J1m5a/uly6ryO\nLMvir1V/bbLs+5Xvx9N98vtQ67XUeR27Du/inMJz4sfWfrGWbz3/rTasvYh0BAoAaaogt6DJ/NML\nTgeCMYLY9tzCc5ss27NL/RLS1XXV5OcGK5DuPbyXvy/++/ix0jNK+c03fgPUzw5avH5xkwPEmj0k\nkjkUANLUhf0vbDK/T36fBvsDug1g/Jnjmyxbtb8qnh7ScwgX9rsQw6j1Wr7U80vxY+WflfPQWw8B\n9bODXvvktSZnCcWOKwCIdHxaDjpN1dbVMuaZMSzdtJTcrFyKuxdz/dnXc2nJpVz/f66n1msByMnK\nIduyOVJ7pMH5WWTh4f/ieZZFnddhGI6TRRZ1BHcShpGTlUN1XfUxdcm2bBZ8cwF98vtwxbwr4vmX\nDbyMZd9dloLWi0gyUr4ctJnNNbMqM/swIa+XmS0xsw3htmfCsfvMbKOZrTezMS2tWNRlZ2Xzyo2v\nMH3UdHp16cX2fdt57ZPXeHvr29R6LTlZwfh9TV0NWRb8a+zZub7Lp2unrg0u/kCDi3/j8mbW5MUf\noNZr+Yff/wNn9j6T6aOmA7D8puUaOBbp4JLpAvodMLZR3r3AUncfBiwN9zGz4cAUYER4ziwzy26z\n2kZMdlY2j4x5hM/v+pyDPz7I6v+xmn1H9h1TLvbrf9fhXeRm5QInHkQGyM3OjadjYwpNKcwvpLqu\nmgPVB+JdP51zOlNYoHcVi3RkJwwA7v7fQOMO34nAvDA9D5iUkL/A3Y+4+yZgIzCyjeoqwPn9gvcL\nX3/W9fG8bMvGMID4r/hY1w7ApcWXNviMWNnKA5XxvM7ZnZv9zh0HdwAw7JfDWLR+EQDzP5jPj5b8\nSGMBIh1YSweB+7r79jD9OdA3TA8AtiaUqwjzpI2UXVjG9FHTWbZ5WfxCXlNXg+NkN3GzZRh/rvhz\ng7zYXUBp//quw8O1hxuc05RfjP0Fuw/vBuDRdx7l4bcf1owgkQ6s1bOAPBhFPumRZDO71czKzax8\nx44dra1GZMS6hSrvrqT6J9VMHzWdzjnBr3fH411AsXGBxIHgktNK4p9jGH/Z/pf4/uDug+Pp2HTR\n2DhDzE9e/wnXDbuuQd4vV/6Scc+O0/TQFNE/V0mllgaASjPrDxBuY/MNtwHFCeWKwrxjuPscdy91\n99LCQvUlt0QsGDw29jEAHrn2EcYMCcbdYwEAiM8Y2nt0bzzv5otubvBZm/ZsCs4jiwPVBwDigSVm\n39F9/HHDHwEYO6R+WGjltpV89w/f1eJyKaBpt5JKSU0DNbNBwEvufk64/7+Av7n7z8zsXqCXu99j\nZiOA+QT9/mcQDBAPcw+vQM3QNNDWqa2r5e4ldzP/g/nsPLST6rpqzjv9vGafEIb6KaEAA7sPZMue\nLSf1nd06dWPf0WBA+hdjf8HtL98eP3ZR/4tYdeuqFrSk40n1on2adivHcyqmgT4H/Bk408wqzKwM\n+BlwjZltAK4O93H3NcBC4CPgZWDaiS7+0nqJs4UO/fgQ00dNZ9PuTfHjnbI6HTM+UOd18aeNm7v4\nd8npEk9nWRZf7v/l+H7s4g+w58gevjn8m/H9d7e/G5nF5WKL9s28aibzJs1jVvkshvQcwsHqg7y4\n9kXe2PIGFXsruPmim5ssO7t8dvyzKvdXsvCbC+mS04WZb87k4qKLNe1WUiqZWUA3uHt/d8919yJ3\nf9rd/+buV7n7MHe/2t13JpSf4e5D3P1Md/9TaqsvjcWCwa4f7eKqwVcBwThAQadjl5Y4UH2AvOw8\nALrnda//jDBYHKo5FM+r8zqqDtQ/WZyfkx9Pz1g+g6xG/ykdrT3aBq1Jf6u3rwbgtj/dxg2/vwGA\nt7a+xeji0Tz7wbPM/2A+Jd1LuPpLVzdbNmbs0LGc2/dcBvYYyM5DO8nLyeO0vNMA6Ne1n6bdSpvT\naqAZKvYgWWLXUExuVm58umif/D5s27eNfUf30SOvB7uP7G7wOTlZOfFlpbvndWdrOMmroFMBJT1K\nWPfFOg7XHObjXR/Hz/nNN37DxDMnprqJaSE2LXfGlTM4u/BsPtn1CUN7DWXrnq3c8n9voaBTAdNH\nTSfLspotGxMbdDeMWNds7C5s7Y619O7Su8H6TiKtpbWAMljjrqEB3YIZudV11fGpnkN6DsEw6ryO\n/Uf3c1reaQ3K1tTVxMt27dQ1/tlDew2ld5feGEa2ZTNv0rz4oPHv3vvdMTOIMlVsWu7MN2cyacEk\nZq+aTX5uPpNHTCY/N599R/Zx0wU3Hbfs8Yz/u/EM6jGICQsmcPeSu09FkyRCtBZQhDxZ/iTf/+P3\n6dapG0drj1JYUMhdF99FXnYe3//P7wNwx1fv4LGxjzVb9stnfJlLf3spo4tH8+b33uS0madxsPog\nNT+t4c6X7+TRFY8CcOO5N/Lv1/97ezZXJOOlfBBYMsctF93C9FHTyc/NJ8uy6JPfh68M+ArfPv/b\ndOvUDcPig47NlS06rQiAywddDsB5fc+L55VdVMagHoMAyMvJO+XtE5GTozsAEZEOSncA0mHpKVeR\n9qUAIO1GT7mKtC8FAGlWnddx16t30e9/96PXQ72Y9sdp3PnKnRT8WwH7j+7H3Sl+tJjJz09usmxN\nXQ2bd2/GHjTGPDOGs584m94P9+aFj16IX/whWGU0Kg+OiaQTBQBplp5yFcls0ZisLS2S+ORqTOJT\nriXdS+JPud72n7c1Wfa6vwtWD018yvWzfZ/pKVeRNKAAIM3SU67JWbZ5Gcs2L+P2r95Ory692rs6\nIklTF5A0S0+5JkeD2dJR6TkAyXhtuWRz6RmlbN69mdq6WuZ8Y46WbJZ21drnANQFJBkvNkA9d8Jc\n+uT3YcKCCfxq3K/ig9nF3YuPGcxOLDu8cHh8LGPrnq18cfAL+hb0ZeabM3n7e28zfdR0Hn3nUZbf\ntJwze5/Zzq0VSZ4CgGSUpn7tx5ZcThygfuithxhdPJqfvv5TKg9UkkUWf1j7B9yCO+LvLf5efFns\nO1+9E7NgQbzKA5Xx7Z7DezSYLR2axgAkozQ1HfXsPmcDMOmsSdx/2f0A/NNX/olhvYaxec9mACaP\nmMyvV/2a3Yfql8Me2H0g/3Lpv9CtUzdm/WUWACMHjATg3NPPjV/sEwezdx3adUraKdIWFAAkozT1\n0hXHOaPrGTz/0fPct/Q+crNyuaTkEnKzcoHgxTeL1i+Kly27sAyAT/d+yksbXqJ/t/7sPhwEhtgL\ndDpld4q/dzlTBrMletQFJBmluemoY4aMaTB19ZKSS3hx7YtB2StmUDqgNF52aK+hPL36aX40+kf8\n65X/yiVzL2HfkX34/c5Dbz7E8k+Xc/9l93NJySUAjDh9BJvu2NRsnUTSle4AJKOczNTV2GsyH3nn\nkchNXRUBTQOVNNaW0zevHXItn+75lKoDVcweP1vTNyUjaDloyVhai0gktTQGIGlLaxGJpJYCgKQt\nrUUkklrqApK0pbWIRFJLg8AiIh2UBoFFRKRFFABERCJKAUBEJKJSFgDMbKyZrTezjWZ2b6q+R0RE\nWiYlAcDMsoEngHHAcOAGMxueiu8SEZGWSdUdwEhgo7t/4u5HgQXAxBR9l4iItECqHgQbAGxN2K8A\nvppYwMxuBW4Nd4+Y2Ycpqks66AN80d6VSCG1r2PL5PZlctsAWvUKunZ7Etjd5wBzAMysvDVzWdOd\n2texqX0dVya3DYL2teb8VHUBbQOKE/aLwjwREUkTqQoAfwGGmdlgM+sETAEWp+i7RESkBVLSBeTu\nNWb2A+AVIBuY6+5rjnPKnFTUI42ofR2b2tdxZXLboJXtS4u1gERE5NTTk8AiIhGlACAiElHtHgAy\nYckIM5trZlWJzzKYWS8zW2JmG8Jtz4Rj94XtXW9mY9qn1skxs2Ize93MPjKzNWZ2R5ifKe3rbGYr\nzez9sH0PhvkZ0T4Insw3s9Vm9lK4nzFtAzCzzWb2gZm9F5sWmSltNLMeZvaCma0zs7VmdnGbts3d\n2+2PYID4Y+BLQCfgfWB4e9aphe34GnAR8GFC3sPAvWH6XuChMD08bGceMDhsf3Z7t+E4besPXBSm\nuwH/L2xDprTPgK5hOhdYAYzKlPaFdb4TmA+8lEn/bSa0bzPQp1FeRrQRmAfcHKY7AT3asm3tfQeQ\nEUtGuPt/AzsbZU8k+JdHuJ2UkL/A3Y+4+yZgI8E/h7Tk7tvd/d0wvQ9YS/Ckd6a0z919f7ibG/45\nGdI+MysCrgOeSsjOiLadQIdvo5l1J/hx+TSAux919920YdvaOwA0tWTEgHaqS1vr6+7bw/TnQN8w\n3WHbbGaDgAsJfiVnTPvCLpL3gCpgibtnUvseA+4B6hLyMqVtMQ68ZmarwiVmIDPaOBjYAfw27MJ7\nyswKaMO2tXcAiAQP7s869HxbM+sK/B74Z3ffm3iso7fP3Wvd/QKCJ9ZHmtk5jY53yPaZ2Xigyt1X\nNVemo7atkUvCf3/jgGlm9rXEgx24jTkEXcu/dvcLgQMEXT5xrW1beweATF4yotLM+gOE26owv8O1\n2cxyCS7+z7r7f4TZGdO+mPD2+nVgLJnRvtHABDPbTNC9eqWZPUNmtC3O3beF2yrgRYJuj0xoYwVQ\nEd6RArxAEBDarG3tHQAyecmIxcDUMD0VWJSQP8XM8sxsMDAMWNkO9UuKmRlBH+Rad38k4VCmtK/Q\nzHqE6S7ANcA6MqB97n6fuxe5+yCC/2/9l7vfSAa0LcbMCsysWywNXAt8SAa00d0/B7aaWWzFz6uA\nj2jLtqXBKPfXCWaWfAz8uL3r08I2PAdsB6oJonYZ0BtYCmwAXgN6JZT/cdje9cC49q7/Cdp2CcEt\n5l+B98K/r2dQ+84DVoft+xD4aZifEe1LqPPl1M8Cypi2EcwgfD/8WxO7hmRKG4ELgPLwv88/AD3b\nsm1aCkJEJKLauwtIRETaiQKAiEhEKQCIiESUAoCISEQpAIiIRJQCgIhIRCkAiIhE1P8H5YIFjxhC\n29sAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0xc66c9e8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "\n",
    "colors = ['r','g']\n",
    "\n",
    "\n",
    "n_clusters = 2\n",
    "mb_kmeans = MiniBatchKMeans(n_clusters = n_clusters)\n",
    "X_cluster=mb_kmeans.fit_transform(X_train)\n",
    "\n",
    "y_train_pred = mb_kmeans.labels_\n",
    "\n",
    "cents = mb_kmeans.cluster_centers_# 质心\n",
    "\n",
    "for i in range(n_clusters):\n",
    "    index = np.nonzero(y_train_pred==i)[0] # 0,1分类\n",
    "    \n",
    "    x1 = X_cluster[index,0] # \n",
    "    \n",
    "    x2 = X_cluster[index,1] #\n",
    "    y_i = 'event'\n",
    "    for j in range(len(x1)):\n",
    "        if j < 20:\n",
    "            plt.text(x1[j],x2[j],str(y_i),color=colors[i],\\\n",
    "            fontdict={'weight':'bold','size':9})\n",
    "plt.axis([0,600,0,600])\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 这里的xy坐标取值还是没弄懂。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
